Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve automated Zooma mappings #411

Merged
merged 4 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmat/trait_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def process_trait(trait: Trait, filters: dict, zooma_host: str, oxo_target_list:
"""
logger.debug('Processing trait {}'.format(trait.name))

trait.zooma_result_list = get_zooma_results(trait.name, filters, zooma_host, target_ontology)
trait.zooma_result_list = get_zooma_results(trait.name.lower(), filters, zooma_host, target_ontology)
trait.process_zooma_results()
if (trait.is_finished
or len(trait.zooma_result_list) == 0
Expand Down
11 changes: 5 additions & 6 deletions cmat/trait_mapping/trait.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,11 @@ def process_zooma_results(self):
Check whether any Zooma mappings can be output as a finished ontology mapping.
Put any finished mappings in finished_mapping_set
"""
for mapping in self.zooma_result_list:
if mapping.confidence.lower() != "high":
continue

for mapping in mapping.mapping_list:
if mapping.in_ontology and mapping.is_current:
for zooma_result in self.zooma_result_list:
for mapping in zooma_result.mapping_list:
# Accept either high-confidence mappings, or exact string matches
apriltuesday marked this conversation as resolved.
Show resolved Hide resolved
if mapping.in_ontology and mapping.is_current and (zooma_result.confidence.lower() == "high"
or zooma_result.zooma_label.lower() == self.name.lower()):
ontology_entry = OntologyEntry(mapping.uri, mapping.ontology_label)
self.finished_mapping_set.add(ontology_entry)

Expand Down
3 changes: 2 additions & 1 deletion tests/pipelines/resources/expected/consequences_snp.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@
17:65558594:G:A ENSG00000168646 AXIN2 synonymous_variant
17:6556374:C:T ENSG00000091622 PITPNM3 intron_variant
17:6628328:A:G ENSG00000198920 KIAA0753 synonymous_variant
17:6628328:A:G ENSG00000282936 3_prime_UTR_variant
17:6693178:A:AACACACACACAC ENSG00000141485 SLC13A5 splice_polypyrimidine_tract_variant
17:7224245:T:C ENSG00000072778 ACADVL splice_donor_variant
17:73201307:A:G ENSG00000166685 COG1 missense_variant
Expand Down Expand Up @@ -900,7 +901,7 @@
4:120785105:A:G ENSG00000138738 PRDM5 splice_polypyrimidine_tract_variant
4:121847458:G:T ENSG00000138686 BBS7 missense_variant
4:121853086:C:T ENSG00000138686 BBS7 missense_variant
4:122979387:G:T ENSG00000145375 SPATA5 splice_donor_variant
4:122979387:G:T ENSG00000145375 AFG2A splice_donor_variant
4:127881908:G:T ENSG00000142731 PLK4 missense_variant
4:127930737:G:T ENSG00000164073 MFSD8 missense_variant
4:128959797:C:A ENSG00000151466 SCLT1 intron_variant
Expand Down
1 change: 1 addition & 0 deletions tests/pipelines/resources/expected/evidence_strings.json
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["uncertain significance"], "confidence": "criteria provided, single submitter", "studyId": "RCV001373139", "releaseDate": "2021-04-13", "targetFromSourceId": "ENSG00000163930", "variantFunctionalConsequenceId": "SO_0001583", "variantId": "3_52402628_G_A", "cohortPhenotypes": ["BAP1 tumor predisposition syndrome", "BAP1-related tumor predisposition syndrome", "Tumor predisposition syndrome", "Tumor susceptibility linked to germline BAP1 mutations"], "diseaseFromSource": "BAP1-related tumor predisposition syndrome", "diseaseFromSourceId": "C3280492", "diseaseFromSourceMappedId": "MONDO_0013692", "variantHgvsId": "NC_000003.12:g.52402628G>A"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["uncertain significance"], "confidence": "criteria provided, single submitter", "studyId": "RCV001373139", "releaseDate": "2021-04-13", "targetFromSourceId": "ENSG00000163930", "variantFunctionalConsequenceId": "SO_0001583", "variantId": "3_52402628_G_A", "cohortPhenotypes": ["BAP1 tumor predisposition syndrome", "BAP1-related tumor predisposition syndrome", "Tumor predisposition syndrome", "Tumor susceptibility linked to germline BAP1 mutations"], "diseaseFromSource": "BAP1-related tumor predisposition syndrome", "diseaseFromSourceId": "C3280492", "diseaseFromSourceMappedId": "Orphanet_289539", "variantHgvsId": "NC_000003.12:g.52402628G>A"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["benign"], "confidence": "criteria provided, single submitter", "studyId": "RCV001730858", "releaseDate": "2022-02-20", "targetFromSourceId": "ENSG00000198920", "variantFunctionalConsequenceId": "SO_0001819", "variantId": "17_6628328_A_G", "cohortPhenotypes": ["Joubert syndrome 38"], "diseaseFromSource": "Joubert syndrome 38", "diseaseFromSourceId": "C5561958", "variantHgvsId": "NC_000017.11:g.6628328A>G"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["benign"], "confidence": "criteria provided, single submitter", "studyId": "RCV001730858", "releaseDate": "2022-02-20", "targetFromSourceId": "ENSG00000282936", "variantFunctionalConsequenceId": "SO_0001624", "variantId": "17_6628328_A_G", "cohortPhenotypes": ["Joubert syndrome 38"], "diseaseFromSource": "Joubert syndrome 38", "diseaseFromSourceId": "C5561958", "variantHgvsId": "NC_000017.11:g.6628328A>G"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["likely benign"], "confidence": "criteria provided, single submitter", "studyId": "RCV002057147", "releaseDate": "2022-06-09", "targetFromSourceId": "ENSG00000115904", "variantFunctionalConsequenceId": "SO_0001627", "variantId": "2_39120324_C_A", "variantRsId": "rs368569135", "cohortPhenotypes": ["Noonan spectrum disorder", "RASopathy", "rasopathies"], "diseaseFromSource": "RASopathy", "diseaseFromSourceId": "C5555857", "diseaseFromSourceMappedId": "EFO_1001502", "variantHgvsId": "NC_000002.12:g.39120324C>A"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["pathogenic"], "confidence": "criteria provided, single submitter", "studyId": "RCV002247286", "releaseDate": "2022-06-09", "targetFromSourceId": "ENSG00000198712", "variantFunctionalConsequenceId": "SO_0001631", "variantId": "MT_7512_T_C", "variantRsId": "rs199474817", "cohortPhenotypes": ["COX deficiency", "Complex 4 mitochondrial respiratory chain deficiency", "Complex IV deficiency", "Cytochrome-c oxidase deficiency", "Cytochrome-c oxidase deficiency disease", "Deficiency of mitochondrial respiratory chain complex4", "MITOCHONDRIAL COMPLEX IV DEFICIENCY, NUCLEAR TYPE 1", "Mitochondrial complex IV deficiency"], "diseaseFromSource": "Cytochrome-c oxidase deficiency disease", "diseaseFromSourceId": "C5435656", "diseaseFromSourceMappedId": "MONDO_0009068", "variantHgvsId": "NC_012920.1:m.7512T>C"}
{"alleleOrigins": ["germline"], "datasourceId": "eva", "datatypeId": "genetic_association", "clinicalSignificances": ["pathogenic"], "confidence": "criteria provided, single submitter", "studyId": "RCV002247286", "releaseDate": "2022-06-09", "targetFromSourceId": "ENSG00000198786", "variantFunctionalConsequenceId": "SO_0001631", "variantId": "MT_7512_T_C", "variantRsId": "rs199474817", "cohortPhenotypes": ["COX deficiency", "Complex 4 mitochondrial respiratory chain deficiency", "Complex IV deficiency", "Cytochrome-c oxidase deficiency", "Cytochrome-c oxidase deficiency disease", "Deficiency of mitochondrial respiratory chain complex4", "MITOCHONDRIAL COMPLEX IV DEFICIENCY, NUCLEAR TYPE 1", "Mitochondrial complex IV deficiency"], "diseaseFromSource": "Cytochrome-c oxidase deficiency disease", "diseaseFromSourceId": "C5435656", "diseaseFromSourceMappedId": "MONDO_0009068", "variantHgvsId": "NC_012920.1:m.7512T>C"}
Expand Down
27 changes: 26 additions & 1 deletion tests/trait_mapping/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import pytest

from cmat.trait_mapping.main import parse_traits, process_traits
from cmat.trait_mapping.main import parse_traits, process_traits, process_trait
from cmat.trait_mapping.trait import Trait


def get_test_resource(resource_name):
Expand Down Expand Up @@ -62,3 +63,27 @@ def test_main():
mapped_terms = {x[0] for x in output_mappings}
curation_terms = {x[0] for x in output_curation}
assert len(mapped_terms) + len(curation_terms) == len(all_terms)


def test_process_trait_exact_match():
# Exact match with MONDO:0009061 (in EFO and Mondo)
trait_name = 'Cystic Fibrosis'
# Don't use any data sources in Zooma as those will come back as high-confidence matches
zooma_filters = {'ontologies': 'efo,mondo,hp',
'required': 'none',
'preferred': 'none'}
zooma_host = 'https://www.ebi.ac.uk'
# Don't use OxO
oxo_targets = []
oxo_distance = 0

# This should be marked as finished, as it's an exact string match with a term contained in the target ontology
efo_trait = process_trait(Trait(trait_name, None, None), zooma_filters, zooma_host, oxo_targets, oxo_distance,
target_ontology='efo')
assert efo_trait.is_finished

# This should not be marked as finished, even though Zooma finds an exact match in one of its ontologies, it's not
# the requested target ontology and thus still needs to be curated
hpo_trait = process_trait(Trait(trait_name, None, None), zooma_filters, zooma_host, oxo_targets, oxo_distance,
target_ontology='hp')
assert not hpo_trait.is_finished
Loading