Skip to content

Commit

Permalink
Merge pull request #419 from apriltuesday/feb-hotfix
Browse files Browse the repository at this point in the history
Modify counts for when evidence is invalid
  • Loading branch information
apriltuesday authored Feb 7, 2024
2 parents f816f60 + be2dd91 commit 0bfba70
Showing 1 changed file with 27 additions and 8 deletions.
35 changes: 27 additions & 8 deletions cmat/output_generation/clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(self, trait_mappings, consequence_mappings):
self.clinvar_skip_unsupported_variation = 0
self.clinvar_skip_no_functional_consequences = 0
self.clinvar_skip_missing_efo_mapping = 0
self.clinvar_skip_invalid_evidence_string = 0
self.clinvar_done_one_complete_evidence_string = 0
self.clinvar_done_multiple_complete_evidence_strings = 0

Expand All @@ -57,17 +58,16 @@ def __init__(self, trait_mappings, consequence_mappings):
self.repeat_expansion_variants = 0
self.structural_variants = 0

def collate_report(self):
def print_report_and_check_counts(self):
"""Print report of counts and return True if counts are consistent, False otherwise."""
# ClinVar tallies.
clinvar_fatal = self.clinvar_fatal_no_valid_traits
clinvar_skipped = (self.clinvar_skip_unsupported_variation + self.clinvar_skip_no_functional_consequences +
self.clinvar_skip_missing_efo_mapping)
self.clinvar_skip_missing_efo_mapping + self.clinvar_skip_invalid_evidence_string)
clinvar_done = (self.clinvar_done_one_complete_evidence_string +
self.clinvar_done_multiple_complete_evidence_strings)
assert clinvar_fatal + clinvar_skipped + clinvar_done == self.clinvar_total, \
'ClinVar evidence string tallies do not add up to the total amount.'

return f'''Total number of evidence strings generated\t{self.evidence_string_count}
report = f'''Total number of evidence strings generated\t{self.evidence_string_count}
Total number of complete evidence strings generated\t{self.complete_evidence_string_count}
Total number of ClinVar records\t{self.clinvar_total}
Expand All @@ -76,6 +76,7 @@ def collate_report(self):
Unsupported variation type\t{self.clinvar_skip_unsupported_variation}
No functional consequences\t{self.clinvar_skip_no_functional_consequences}
Missing EFO mapping\t{self.clinvar_skip_missing_efo_mapping}
Invalid evidence string\t{self.clinvar_skip_invalid_evidence_string}
Done: Generated at least one complete evidence string\t{clinvar_done}
One complete evidence string\t{self.clinvar_done_one_complete_evidence_string}
Multiple complete evidence strings\t{self.clinvar_done_multiple_complete_evidence_strings}
Expand All @@ -91,6 +92,15 @@ def collate_report(self):
Total number of variant to consequence mappings\t{self.total_consequence_mappings}
Number of repeat expansion variants\t{self.repeat_expansion_variants}
Number of structural variants \t{self.structural_variants}'''.replace('\n' + ' ' * 12, '\n')
print(report)

# Confirm counts as expected, exit with error if not.
expected_total = clinvar_fatal + clinvar_skipped + clinvar_done
if expected_total != self.clinvar_total:
logger.error(f'ClinVar evidence string tallies do not add up to the total amount: '
f'fatal + skipped + done = {expected_total}, total = {self.clinvar_total}')
return False
return True

def write_unmapped_terms(self, dir_out):
with open(os.path.join(dir_out, UNMAPPED_TRAITS_FILE_NAME), 'w') as unmapped_traits_file:
Expand All @@ -117,18 +127,21 @@ def launch_pipeline(clinvar_xml_file, efo_mapping_file, gene_mapping_file, ot_sc
string_to_efo_mappings, _ = load_ontology_mapping(efo_mapping_file)
variant_to_gene_mappings = CT.process_consequence_type_file(gene_mapping_file)

report = clinvar_to_evidence_strings(
report, exception_raised = clinvar_to_evidence_strings(
string_to_efo_mappings, variant_to_gene_mappings, clinvar_xml_file, ot_schema_file,
output_evidence_strings=os.path.join(dir_out, EVIDENCE_STRINGS_FILE_NAME))
print(report.collate_report())
counts_consistent = report.print_report_and_check_counts()
report.write_unmapped_terms(dir_out)
if exception_raised or not counts_consistent:
sys.exit(1)


def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings, clinvar_xml, ot_schema,
output_evidence_strings):
report = Report(trait_mappings=string_to_efo_mappings, consequence_mappings=variant_to_gene_mappings)
ot_schema_contents = json.loads(open(ot_schema).read())
output_evidence_strings_file = open(output_evidence_strings, 'wt')
exception_raised = False

logger.info('Processing ClinVar records')
for clinvar_record in ClinVarDataset(clinvar_xml):
Expand Down Expand Up @@ -202,16 +215,22 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
elif complete_evidence_strings_generated > 1:
report.clinvar_done_multiple_complete_evidence_strings += 1

if evidence_strings_generated == 0:
report.clinvar_skip_invalid_evidence_string += 1

report.complete_evidence_string_count += complete_evidence_strings_generated
report.evidence_string_count += evidence_strings_generated

except Exception as e:
# We catch exceptions but record when one is thrown, so that the pipeline will crash after processing all
# records and printing the report.
logger.error(f'Problem generating evidence for {clinvar_record.accession}')
logger.error(f'Error: {e}')
exception_raised = True
continue

output_evidence_strings_file.close()
return report
return report, exception_raised


def format_creation_date(s):
Expand Down

0 comments on commit 0bfba70

Please sign in to comment.