Skip to content

Commit

Permalink
Merge pull request #70 from griffithlab/issue_36
Browse files Browse the repository at this point in the history
Output warning when VCF is annotated with the PICK flag but not transcript was PICK'ed
  • Loading branch information
susannasiebert authored Oct 16, 2023
2 parents 63ceb20 + 2b5a1f0 commit cff9982
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 3 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CHROM POS REF ALT SYMBOL
chr17 7675088 C T TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53
14 changes: 14 additions & 0 deletions tests/test_vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,17 @@ def test_vcf_with_multiple_transcripts_and_no_pick(self):
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.merge_multiple_transcripts.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()

def test_vcf_with_multiple_transcripts_and_pick_set_for_none(self):
logging.disable(logging.NOTSET)
with LogCapture() as l:
temp_path = tempfile.TemporaryDirectory()
os.symlink(os.path.join(self.test_data_dir, 'input.no_pick_value.vcf.gz'), os.path.join(temp_path.name, 'input.vcf.gz'))
command = [
os.path.join(temp_path.name, 'input.vcf.gz'),
'SYMBOL',
]
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.no_pick_value.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()
l.check_present(('root', 'WARNING', "VCF is annotated with the PICK flag but no PICK'ed transcript found for variant chr17 7675088 C T. Writing values for all transcripts."))
15 changes: 12 additions & 3 deletions vatools/vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tempfile
import csv
import binascii
import logging

def define_parser():
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -99,13 +100,18 @@ def resolve_alleles(entry, csq_alleles):
return alleles

def transcript_for_alt(transcripts, alt):
no_pick_value = False
for transcript in transcripts[alt]:
if 'PICK' in transcript and transcript['PICK'] == '1':
return transcript
return transcript, no_pick_value

if 'PICK' in transcripts[alt][0]:
no_pick_value = True

merged_transcripts = {}
for key in transcripts[alt][0].keys():
merged_transcripts[key] = ",".join([transcript[key] for transcript in transcripts[alt]])
return merged_transcripts
return merged_transcripts, no_pick_value

def decode_hex(match_string):
hex_string = match_string.group(0).replace('%', '')
Expand Down Expand Up @@ -142,7 +148,10 @@ def extract_vep_fields(args):
alt = alt.serialize()
if alt not in vep[chr][pos][ref]:
if alleles_dict[alt] in transcripts:
vep[chr][pos][ref][alt] = transcript_for_alt(transcripts, alleles_dict[alt])
values, no_pick_value = transcript_for_alt(transcripts, alleles_dict[alt])
if no_pick_value:
logging.warning("VCF is annotated with the PICK flag but no PICK'ed transcript found for variant {} {} {} {}. Writing values for all transcripts.".format(chr, pos, ref, alt))
vep[chr][pos][ref][alt] = values
else:
vep[chr][pos][ref][alt] = None
else:
Expand Down

0 comments on commit cff9982

Please sign in to comment.