Skip to content

Commit

Permalink
add explicit OLS search before querying Zooma
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Jan 22, 2024
1 parent 8693671 commit 6dcd166
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 7 deletions.
13 changes: 10 additions & 3 deletions cmat/trait_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from collections import Counter

from cmat.clinvar_xml_io import ClinVarTrait
from cmat.trait_mapping.ols import get_uri_from_exact_match
from cmat.trait_mapping.output import output_trait
from cmat.trait_mapping.oxo import get_oxo_results
from cmat.trait_mapping.oxo import uris_to_oxo_format
from cmat.trait_mapping.trait import Trait
from cmat.trait_mapping.trait import Trait, OntologyEntry
from cmat.trait_mapping.trait_names_parsing import parse_trait_names
from cmat.trait_mapping.zooma import get_zooma_results

Expand All @@ -32,8 +33,9 @@ def get_uris_for_oxo(zooma_result_list: list) -> set:

def process_trait(trait: Trait, filters: dict, zooma_host: str, oxo_target_list: list, oxo_distance: int, target_ontology: str = 'EFO') -> Trait:
"""
Process a single trait. Find any mappings in Zooma. If there are no high confidence Zooma
mappings that are in EFO then query OxO with any high confidence mappings not in EFO.
Process a single trait. First look for an exact string match in the target ontology and return immediately if found.
Otherwise find any mappings in Zooma. If there are no high confidence Zooma mappings that are in EFO then query OxO
with any high confidence mappings not in EFO.
:param trait: The trait to be processed.
:param filters: A dictionary of filters to use for querying Zooma.
Expand All @@ -47,6 +49,11 @@ def process_trait(trait: Trait, filters: dict, zooma_host: str, oxo_target_list:
"""
logger.debug('Processing trait {}'.format(trait.name))

string_match_uri = get_uri_from_exact_match(trait.name.lower(), target_ontology)
if string_match_uri:
trait.finished_mapping_set.add(OntologyEntry(string_match_uri, trait.name.lower()))
return trait

trait.zooma_result_list = get_zooma_results(trait.name.lower(), filters, zooma_host, target_ontology)
trait.process_zooma_results()
if (trait.is_finished
Expand Down
29 changes: 29 additions & 0 deletions cmat/trait_mapping/ols.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from functools import lru_cache
import logging
import requests
Expand Down Expand Up @@ -121,3 +122,31 @@ def get_replacement_term(uri: str, ontology: str = 'EFO') -> str:
if response_json["term_replaced_by"] is not None:
return response_json["term_replaced_by"]
return ""


@lru_cache(maxsize=16384)
@retry(exceptions=(ConnectionError, requests.RequestException), tries=4, delay=2, backoff=1.2, jitter=(1, 3))
def get_uri_from_exact_match(text, ontology='EFO'):
"""
Finds URI from target ontology for a given text based on exact string match.
:param text: String to search for
:param ontology: ID of target ontology to query (default EFO)
:return: URI of matching term or None if not found
"""
search_url = os.path.join(OLS_SERVER, f'api/search?ontology={ontology}&q={text}&queryFields=label&exact=true')
response = requests.get(search_url)
response.raise_for_status()
data = response.json()
if 'response' in data:
results = data['response']['docs']
candidates = set()
for result in results:
# Check that we've found the term exactly (strict case-insensitive string match)
if result['label'].lower() == text.lower():
candidates.add(result['iri'])
# Only return a result if we can find it unambiguously
if len(candidates) == 1:
return candidates.pop()
logger.warning(f'Could not find an IRI for {text}')
return None
8 changes: 4 additions & 4 deletions tests/trait_mapping/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def test_main():
def test_process_trait_exact_match():
# Exact match with MONDO:0009061 (in EFO and Mondo)
trait_name = 'Cystic Fibrosis'
# Don't use any data sources in Zooma as those will come back as high-confidence matches
zooma_filters = {'ontologies': 'efo,mondo,hp',
'required': 'none',
'preferred': 'none'}
# Use our default Zooma filters
zooma_filters = {'ontologies': 'efo,ordo,hp,mondo',
'required': 'cttv,eva-clinvar,clinvar-xrefs,gwas',
'preferred': 'eva-clinvar,cttv,gwas,clinvar-xrefs'}
zooma_host = 'https://www.ebi.ac.uk'
# Don't use OxO
oxo_targets = []
Expand Down

0 comments on commit 6dcd166

Please sign in to comment.