From 015720374d12373ffbec07f49657d571ea5b5fa3 Mon Sep 17 00:00:00 2001 From: Brendan ODonnell Date: Fri, 2 Feb 2024 14:40:03 -0500 Subject: [PATCH] Rewrite dup as ins and try both shuffle directions --- src/hgvs/assemblymapper.py | 51 +++++++++++++++++++++++++------ tests/test_hgvs_assemblymapper.py | 2 +- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/hgvs/assemblymapper.py b/src/hgvs/assemblymapper.py index b73c759e..f65f7c9d 100644 --- a/src/hgvs/assemblymapper.py +++ b/src/hgvs/assemblymapper.py @@ -15,6 +15,9 @@ HGVSUnsupportedOperationError, ) from hgvs.variantmapper import VariantMapper +from hgvs.posedit import PosEdit +from hgvs.edit import NARefAlt +from hgvs.location import SimplePosition, Interval _logger = logging.getLogger(__name__) @@ -182,17 +185,16 @@ def c_to_p(self, var_c): and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) and var_out.posedit is None ): - if self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand == 1: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=5 + var_g = self.c_to_g(var_c) + for shuffle_direction in [3, 5]: + shifted_var_g = self._far_shift(var_g, shuffle_direction) + shifted_var_c = super(AssemblyMapper, self).g_to_c( + shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method ) - else: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=3 - ) - var_g = normalizer.normalize(self.c_to_g(var_c)) - var_c = self.g_to_c(var_g, var_c.ac) - var_out = super(AssemblyMapper, self)._c_to_p(var_c) + var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c) + + if var_out.posedit is not None: + break return self._maybe_normalize(var_out) @@ -310,6 +312,35 @@ def _maybe_normalize(self, var): # fall through to return unnormalized variant return var + def _far_shift(self, var_g, shuffle_direction): + """Attempt to shift a variant all the way left or right. Rewrite + duplications as insertions so that the change is shifted as far as + possible.""" + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction + ) + shifted_var_g = normalizer.normalize(var_g) + if shifted_var_g.posedit.edit.type == 'dup': + self._replace_reference(shifted_var_g) + if shuffle_direction == 3: + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1), + end=SimplePosition(base=shifted_var_g.posedit.pos.start.base), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + else: + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.end.base), + end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + + return shifted_var_g + # # Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs) diff --git a/tests/test_hgvs_assemblymapper.py b/tests/test_hgvs_assemblymapper.py index c530a406..2e41e2f9 100644 --- a/tests/test_hgvs_assemblymapper.py +++ b/tests/test_hgvs_assemblymapper.py @@ -202,7 +202,7 @@ def test_c_to_p_with_stop_gain(self): def test_map_of_dup_intron_exon_boundary(self): hgvs_c = "NM_024529.4:c.132-1_132dup" - hgvs_p = "NP_078805.3:p.?" + hgvs_p = "NP_078805.3:p.(Thr45GlyfsTer65)" var_c = self.hp.parse_hgvs_variant(hgvs_c) var_p = self.am.c_to_p(var_c)