Skip to content

Commit

Permalink
Fix ins/dups where splice region is preserved
Browse files Browse the repository at this point in the history
  • Loading branch information
b0d0nne11 committed Sep 14, 2024
1 parent e6dbd1e commit 4983d11
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 2 deletions.
57 changes: 56 additions & 1 deletion src/hgvs/assemblymapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
HGVSUnsupportedOperationError,
)
from hgvs.variantmapper import VariantMapper
from hgvs.posedit import PosEdit
from hgvs.edit import NARefAlt
from hgvs.location import SimplePosition, Interval

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -172,7 +175,31 @@ def n_to_c(self, var_n):
return self._maybe_normalize(var_out)

def c_to_p(self, var_c):
var_out = super(AssemblyMapper, self).c_to_p(var_c)
var_out = super(AssemblyMapper, self)._c_to_p(var_c)

if (
var_c.posedit.edit.type in ['ins', 'dup']
and var_c.type in "cnr"
and var_c.posedit.pos is not None
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
and var_out.posedit is None
):
try:
var_g = self.c_to_g(var_c)
strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand

for shuffle_direction in [3, 5]:
shifted_var_g = self._far_shift(var_g, shuffle_direction, strand)
shifted_var_c = super(AssemblyMapper, self).g_to_c(
shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method
)
var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c)

if var_out.posedit is not None:
break
except HGVSInvalidVariantError:
pass

return self._maybe_normalize(var_out)

def relevant_transcripts(self, var_g):
Expand Down Expand Up @@ -268,6 +295,34 @@ def _maybe_normalize(self, var):
# fall through to return unnormalized variant
return var

def _far_shift(self, var_g, shuffle_direction, strand):
"""Attempt to shift a variant all the way left or right. Rewrite
duplications as insertions so that the change is shifted as far as
possible."""
normalizer = hgvs.normalizer.Normalizer(
self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction
)
shifted_var_g = normalizer.normalize(var_g)
if shifted_var_g.posedit.edit.type == 'dup':
self._replace_reference(shifted_var_g)
if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5):
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1),
end=SimplePosition(base=shifted_var_g.posedit.pos.start.base),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
else:
shifted_var_g.posedit = PosEdit(
pos=Interval(
start=SimplePosition(base=shifted_var_g.posedit.pos.end.base),
end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1),
),
edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref)
)
return shifted_var_g


# <LICENSE>
# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
Expand Down
15 changes: 15 additions & 0 deletions src/hgvs/variantmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,21 @@ def c_to_p(self, var_c, pro_ac=None):
"""

var_p = self._c_to_p(var_c, pro_ac=None)

if (
var_c.posedit.edit.type in ['ins', 'dup']
and var_c.type in "cnr"
and var_c.posedit.pos is not None
and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0)
and var_p.posedit is None
):
raise HGVSUnsupportedOperationError('c_to_p not supported on VariantMapper for this var_c, try AssemblyMapper')

return var_p


def _c_to_p(self, var_c, pro_ac=None):
if not (var_c.type == "c"):
raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c))
if self._validator:
Expand Down
1 change: 0 additions & 1 deletion tests/data/gcp/real.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,5 @@ ID00056 NC_000010.10:g.89693009delG NM_000314.4:c.492+1delG NP_000305.3:p.?
ID00057 NC_000010.10:g.89711873A>C NM_000314.4:c.493-2A>C NP_000305.3:p.?
ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln)
ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.?
ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.?
ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1?
ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*)
61 changes: 61 additions & 0 deletions tests/issues/test_714.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os

import hgvs
import pytest
from support import CACHE

cases = [
{
"name": "ins with splice region preserved",
"var_c": "NM_004119.2:c.1837+21_1837+22insCGAGAGAATATGAATATGATCTCAAATGGGAGTTTCCAAGAGAAAATTTAGAGTTTGGTAAGAATGGAATGTGCCAAA",
"var_p": "NP_004110.2:p.(Lys614_Val615insAsnGlyMetCysGlnThrArgGluTyrGluTyrAspLeuLysTrpGluPheProArgGluAsnLeuGluPheGlyLys)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_004119.2:c.1835_1837+3dup",
"var_p": "NP_004110.2:p.(Gly613_Lys614insIleGly)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_005228.4:c.2284-5_2290dup",
"var_p": "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_004456.4:c.2196-1_2196dup",
"var_p": "NP_004447.2:p.(Tyr733AspfsTer8)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_016222.3:c.27+2_27+5dup",
"var_p": "NP_057306.2:p.(Arg10ValfsTer20)"
},
{
"name": "dup with splice region preserved",
"var_c": "NM_182758.2:c.2953-31_2953-26dup",
"var_p": "NP_877435.2:p.?"
},
]


@pytest.fixture(scope="module")
def hp():
return hgvs.parser.Parser()


@pytest.fixture(scope="module")
def hdp():
return hgvs.dataproviders.uta.connect(
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE
)


@pytest.fixture(scope="module")
def am37(hdp):
return hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name="GRCh37")


@pytest.mark.parametrize("case", cases)
def test_real_c_to_p(case, hp, am37):
var_c = hp.parse(case["var_c"])
assert str(am37.c_to_p(var_c)) == case["var_p"]
3 changes: 3 additions & 0 deletions tests/support/mock_input_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def get_tx_seq(self, ac):
def get_seq(self, ac, start_i=None, end_i=None):
return self.get_tx_seq(ac)[start_i:end_i]

def get_pro_ac_for_tx_ac(self, ac):
return 'MOCK'

#
# internal methods
#
Expand Down

0 comments on commit 4983d11

Please sign in to comment.