Skip to content

Commit

Permalink
Downgrade duplicate variant error to a warning and skip duplicates.
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Oct 13, 2023
1 parent a25f1f1 commit 55aa96c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CHROM POS REF ALT SYMBOL
chr17 7675088 C T TP53
14 changes: 14 additions & 0 deletions tests/test_vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,17 @@ def test_vcf_with_multiple_transcripts_and_no_pick(self):
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.merge_multiple_transcripts.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()

def test_vcf_with_duplicate_variant(self):
logging.disable(logging.NOTSET)
with LogCapture() as l:
temp_path = tempfile.TemporaryDirectory()
os.symlink(os.path.join(self.test_data_dir, 'input.duplicate_variant.vcf.gz'), os.path.join(temp_path.name, 'input.vcf.gz'))
command = [
os.path.join(temp_path.name, 'input.vcf.gz'),
'SYMBOL',
]
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.duplicate_variant.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()
l.check_present(('root', 'WARNING', "VEP entry at CHR chr17, POS 7675088, REF C , ALT T already exists. Skipping subsequent entries."))
8 changes: 6 additions & 2 deletions vatools/vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tempfile
import csv
import binascii
import logging

def define_parser():
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -146,7 +147,7 @@ def extract_vep_fields(args):
else:
vep[chr][pos][ref][alt] = None
else:
sys.exit("VEP entry for at CHR %s, POS %s, REF %s , ALT % already exists" % (chr, pos, ref, alt) )
logging.warning("VEP entry at CHR %s, POS %s, REF %s , ALT %s already exists. Skipping subsequent entries." % (chr, pos, ref, alt) )
vcf_reader.close()
return vep

Expand Down Expand Up @@ -192,6 +193,7 @@ def main(args_input = sys.argv[1:]):
with open(output_file, 'w') as output_filehandle:
writer = csv.DictWriter(output_filehandle, fieldnames = ['CHROM', 'POS', 'REF', 'ALT'] + args.vep_fields, delimiter = "\t")
writer.writeheader()
rows = []
for variant in vcf_reader:
row = {
'CHROM': str(variant.CHROM),
Expand All @@ -200,7 +202,9 @@ def main(args_input = sys.argv[1:]):
'ALT' : ','.join(map(lambda a: a.serialize(), variant.ALT)),
}
row = add_vep_fields_to_row(args, row, vep)
writer.writerow(row)
if row not in rows:
rows.append(row)
writer.writerows(rows)

if __name__ == '__main__':
main()

0 comments on commit 55aa96c

Please sign in to comment.