Skip to content

Commit

Permalink
fixing off-by one error at transcript start, problematic intergenic v…
Browse files Browse the repository at this point in the history
…ariants, and only one assembly mapper on pretty print.
  • Loading branch information
andreasprlic committed Nov 16, 2024
1 parent cdd87a5 commit 9a77892
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 75 deletions.
3 changes: 2 additions & 1 deletion src/hgvs/easy.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@
t_to_g = projector.t_to_g
t_to_p = projector.t_to_p
get_relevant_transcripts = am38.relevant_transcripts
pretty = PrettyPrint(hdp, useColor=True, showLegend=True)
pretty37 = PrettyPrint(hdp, am37, useColor=True, showLegend=True)
pretty38 = PrettyPrint(hdp, am38, useColor=True, showLegend=True)

# <LICENSE>
# Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs)
Expand Down
2 changes: 1 addition & 1 deletion src/hgvs/pretty/console/tx_alig_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def display(self, data: VariantData) -> str:

counter += 1

if not pdata.mapped_pos:
if pdata.mapped_pos is None:
var_str += " "
continue

Expand Down
6 changes: 3 additions & 3 deletions src/hgvs/pretty/console/tx_mapping_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ def display(self, data: VariantData) -> str:
prev_c_pos = c_pos
continue

if (c_pos + 1) % 10 == 0:
if (c_pos) % 10 == 0:
var_str += "|"
prev_c_pos = c_pos
continue

elif (c_pos + 1) % 5 == 0:
elif (c_pos) % 5 == 0:
var_str += "."
prev_c_pos = c_pos
continue
Expand All @@ -69,7 +69,7 @@ def display(self, data: VariantData) -> str:
prev_c_pos = c_pos
continue

elif c_pos == 0:
elif c_pos == 1:
var_str += "|"

var_str += " "
Expand Down
4 changes: 2 additions & 2 deletions src/hgvs/pretty/console/tx_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ def display(self, data: VariantData) -> str:
if len(var_str) > count:
continue

if (c_pos + 1) % 10 == 0:
if (c_pos) % 10 == 0:
# if pdata.c_interval.start.datum == Datum.CDS_END:
# var_str += "*"
var_str += f"{interval} "
continue

elif c_pos == 0:
elif c_pos == 1:
var_str += f"{interval} "
continue
var_str += " "
Expand Down
29 changes: 12 additions & 17 deletions src/hgvs/pretty/datacompiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,24 +206,19 @@ def data(

if tx_ac:
tx_seq = self.config.hdp.get_seq(tx_ac)
if self.config.default_assembly == "GRCh37":
am = self.config.am37
else:
am = self.config.am38

mapper = am._fetch_AlignmentMapper(

mapper = self.config.assembly_mapper._fetch_AlignmentMapper(
tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method="splign"
)

else:
tx_seq = ""
mapper = None
# print(tx_seq)

# we don't know the protein ac, get it looked up:
pro_ac = None
if var_c_or_n and var_c_or_n.type == "c":
var_p = am.c_to_p(var_c_or_n)
var_p = self.config.assembly_mapper.c_to_p(var_c_or_n)
reference_data = RefTranscriptData(self.config.hdp, tx_ac, pro_ac)
else:
var_p = None
Expand Down Expand Up @@ -277,8 +272,8 @@ def data(
reference_data,
pdata,
cig,
prev_c_pos + 1,
prev_n_pos + 1,
prev_c_pos,
prev_n_pos,
)

exon_nr, feat = self._get_exon_nr(tx_exons, chromosome_pos)
Expand All @@ -292,7 +287,7 @@ def data(
pdata.mapped_pos = prev_mapped_pos
pdata.mapped_pos_offset = mapped_pos_offset
pdata.cigar_ref = cig
pdata.ref = chrom_seq[chromosome_pos]
pdata.ref = chrom_seq[chromosome_pos-1]

if mapper.strand > 0:
prev_c_pos += 1
Expand Down Expand Up @@ -321,12 +316,12 @@ def data(
pdata.n_interval = n_interval
if c_interval is not None:
pdata.c_interval = c_interval
c_pos = int(c_interval.start.base) - 1
c_pos = int(c_interval.start.base)
prev_c_pos = c_pos
else:
prev_c_pos = -1

n_pos = int(n_interval.start.base) - 1
n_pos = int(n_interval.start.base)
prev_n_pos = n_pos

self._populate_with_n_c(
Expand All @@ -351,7 +346,7 @@ def data(
pd = reversed(position_details)
position_details = list(pd)

is_rna = var_c_or_n.ac.startswith("NR_") # not sure how to check this for ENSTs
is_rna = var_c_or_n and var_c_or_n.ac.startswith("NR_") # not sure how to check this for ENSTs

vd = VariantData(
seq_start,
Expand Down Expand Up @@ -421,10 +416,10 @@ def _populate_with_n_c(
n_interval,
c_interval,
):
n_pos = int(n_interval.start.base) - 1
n_pos = int(n_interval.start.base)

if c_interval:
c_pos = int(c_interval.start.base) - 1
c_pos = int(c_interval.start.base)
pdata.c_pos = c_pos
pdata.c_offset = c_interval.start.offset
else:
Expand All @@ -435,7 +430,7 @@ def _populate_with_n_c(

pdata.n_pos = n_pos

pdata.tx = tx_seq[pdata.n_pos]
pdata.tx = tx_seq[pdata.n_pos-1]

coding = True
if var_c_or_n.type == "n": # rna coding can't be in protein space
Expand Down
4 changes: 1 addition & 3 deletions src/hgvs/pretty/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,9 @@ class PrettyConfig:
"""A container for various configurations."""

hdp: Interface
am37: AssemblyMapper
am38: AssemblyMapper
assembly_mapper: AssemblyMapper
padding_left: int = 20
padding_right: int = 20
default_assembly: str = "GRCh37"
useColor: bool = False
showLegend: bool = True
showAllShuffleableRegions = False
Expand Down
50 changes: 17 additions & 33 deletions src/hgvs/pretty/pretty_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class PrettyPrint:
def __init__(
self,
hdp: hgvs.dataproviders.interface.Interface,
default_assembly: str = "GRCh37",
assembly_mapper:AssemblyMapper,
padding_left: int = 20,
padding_right: int = 20,
useColor=False,
Expand All @@ -44,37 +44,24 @@ def __init__(
:param padding: spacing left and right of the variant for display purposes.
"""
am37: AssemblyMapper = AssemblyMapper(hdp, assembly_name="GRCh37", alt_aln_method=alt_aln_method)
am38: AssemblyMapper = AssemblyMapper(hdp, assembly_name="GRCh38", alt_aln_method=alt_aln_method)


self.config = PrettyConfig(
hdp,
am37,
am38,
padding_left,
padding_right,
default_assembly,
useColor,
showLegend,
infer_hgvs_c,
all,
show_reverse_strand,
alt_aln_method,
reverse_display
hdp=hdp,
assembly_mapper=assembly_mapper,
padding_left=padding_left,
padding_right=padding_right,
useColor=useColor,
showLegend=showLegend,
infer_hgvs_c=infer_hgvs_c,
all=all,
show_reverse_strand=show_reverse_strand,
alt_aln_method=alt_aln_method,
reverse_display=reverse_display
)

def _get_assembly_mapper(self) -> AssemblyMapper:
if self.config.default_assembly == "GRCh37":
am = self.config.am37
else:
am = self.config.am38

return am

def _get_all_transcripts(self, var_g) -> List[str]:
am = self._get_assembly_mapper()

transcripts = am.relevant_transcripts(var_g)

transcripts = self.config.assembly_mapper.relevant_transcripts(var_g)

return transcripts

Expand All @@ -87,7 +74,7 @@ def _infer_hgvs_c(self, var_g: SequenceVariant, tx_ac: str = None) -> SequenceVa
else:
return None

am = self._get_assembly_mapper()
am = self.config.assembly_mapper

if tx_ac.startswith("NR_"):
var_n = am.g_to_n(var_g, tx_ac)
Expand All @@ -97,10 +84,7 @@ def _infer_hgvs_c(self, var_g: SequenceVariant, tx_ac: str = None) -> SequenceVa

def _map_to_chrom(self, sv: SequenceVariant) -> SequenceVariant:
"""maps a variant to chromosomal coords, if needed."""
if self.config.default_assembly == "GRCh37":
am = self.config.am37
else:
am = self.config.am38
am = self.config.assembly_mapper

if sv.type == "c":
return am.c_to_g(sv)
Expand Down
3 changes: 2 additions & 1 deletion src/hgvs/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def shell():
normalizer,
parse,
parser,
pretty,
pretty37,
pretty38,
projector,
t_to_g,
t_to_p,
Expand Down
Loading

0 comments on commit 9a77892

Please sign in to comment.