From 4633157924479c78282352d473e9572cfc5a3f64 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 24 Jan 2025 09:51:18 -0500 Subject: [PATCH] update coding --- src/metakb/transformers/civic.py | 15 ++-- src/metakb/transformers/moa.py | 6 +- tests/conftest.py | 69 +++++++++++++------ tests/unit/database/test_database.py | 2 +- .../test_civic_transformer_diagnostic.py | 30 +++++--- .../test_moa_transformer_prognostic.py | 5 +- .../test_moa_transformer_therapeutic.py | 4 +- 7 files changed, 92 insertions(+), 39 deletions(-) diff --git a/src/metakb/transformers/civic.py b/src/metakb/transformers/civic.py index 4148c33c..bb460c7b 100644 --- a/src/metakb/transformers/civic.py +++ b/src/metakb/transformers/civic.py @@ -587,6 +587,7 @@ async def _add_variations(self, variants: list[dict]) -> None: # Get variant types variant_types_value = [ Coding( + id=vt["so_id"], code=vt["so_id"], system=f"{vt['url'].rsplit('/', 1)[0]}/", label="_".join(vt["name"].lower().split()), @@ -598,6 +599,7 @@ async def _add_variations(self, variants: list[dict]) -> None: mappings = [ ConceptMapping( coding=Coding( + id=variant_id, code=str(variant["id"]), system="https://civicdb.org/variants/", ), @@ -610,7 +612,7 @@ async def _add_variations(self, variants: list[dict]) -> None: ConceptMapping( coding=Coding( code=variant["allele_registry_id"], - system="https://reg.clinicalgenome.org/", + system="https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", ), relation=Relation.RELATED_MATCH, ) @@ -716,7 +718,8 @@ def _add_genes(self, genes: list[dict]) -> None: mappings=[ ConceptMapping( coding=Coding( - code=f"ncbigene:{gene['entrez_id']}", + id=ncbigene, + code=str(gene["entrez_id"]), system="https://www.ncbi.nlm.nih.gov/gene/", ), relation=Relation.EXACT_MATCH, @@ -777,8 +780,9 @@ def _get_disease(self, disease: dict) -> MappableConcept | None: mappings.append( ConceptMapping( coding=Coding( + id=doid, code=doid, - system="http://purl.obolibrary.org/obo/doid.owl", + system="https://disease-ontology.org/?id=", ), relation=Relation.EXACT_MATCH, ) @@ -877,8 +881,9 @@ def _get_therapy(self, therapy: dict) -> MappableConcept | None: mappings.append( ConceptMapping( coding=Coding( - code=ncit_id, - system="http://purl.obolibrary.org/obo/ncit.owl", + id=ncit_id, + code=ncit_id.split(":")[-1], + system="https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", ), relation=Relation.EXACT_MATCH, ) diff --git a/src/metakb/transformers/moa.py b/src/metakb/transformers/moa.py index 0e4fd260..244e073f 100644 --- a/src/metakb/transformers/moa.py +++ b/src/metakb/transformers/moa.py @@ -291,6 +291,7 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None: mappings = [ ConceptMapping( coding=Coding( + id=moa_variant_id, code=str(variant_id), system="https://moalmanac.org", ), @@ -410,7 +411,7 @@ def _add_documents(self, sources: list) -> None: ConceptMapping( coding=Coding( code=source["nct"], - system="https://clinicaltrials.gov", + system="https://clinicaltrials.gov/search?term=", ), relation=Relation.EXACT_MATCH, ) @@ -601,8 +602,9 @@ def _get_disease(self, disease: dict) -> MappableConcept | None: mappings.append( ConceptMapping( coding=Coding( + id=f"oncotree:{ot_code}", code=ot_code, - system="https://oncotree.mskcc.org", + system="https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=CODE&search=", label=ot_term, ), relation=Relation.EXACT_MATCH, diff --git a/tests/conftest.py b/tests/conftest.py index 3ed97e7f..0757909c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -255,7 +255,7 @@ def civic_mpid33(civic_vid33): { "coding": { "code": "CA126713", - "system": "https://reg.clinicalgenome.org/", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", }, "relation": "relatedMatch", }, @@ -288,7 +288,11 @@ def civic_mpid33(civic_vid33): "relation": "relatedMatch", }, { - "coding": {"code": "33", "system": "https://civicdb.org/variants/"}, + "coding": { + "id": "civic.vid:33", + "code": "33", + "system": "https://civicdb.org/variants/", + }, "relation": "exactMatch", }, ], @@ -316,6 +320,7 @@ def civic_mpid33(civic_vid33): "name": "Variant types", "value": [ { + "id": "SO:0001583", "code": "SO:0001583", "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", @@ -381,7 +386,8 @@ def civic_gid5(): "mappings": [ { "coding": { - "code": "ncbigene:673", + "id": "ncbigene:673", + "code": "673", "system": "https://www.ncbi.nlm.nih.gov/gene/", }, "relation": "exactMatch", @@ -504,7 +510,7 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): { "coding": { "code": "CA123643", - "system": "https://reg.clinicalgenome.org/", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", }, "relation": "relatedMatch", }, @@ -530,7 +536,11 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "relation": "relatedMatch", }, { - "coding": {"code": "12", "system": "https://civicdb.org/variants/"}, + "coding": { + "id": "civic.vid:12", + "code": "12", + "system": "https://civicdb.org/variants/", + }, "relation": "exactMatch", }, ], @@ -558,6 +568,7 @@ def civic_mpid12(civic_vid12, braf_v600e_genomic): "name": "Variant types", "value": [ { + "id": "SO:0001583", "code": "SO:0001583", "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", @@ -605,7 +616,8 @@ def civic_gid19(): "mappings": [ { "coding": { - "code": "ncbigene:1956", + "id": "ncbigene:1956", + "code": "1956", "system": "https://www.ncbi.nlm.nih.gov/gene/", }, "relation": "exactMatch", @@ -647,8 +659,9 @@ def civic_tid146(): "mappings": [ { "coding": { - "code": "ncit:C66940", - "system": "http://purl.obolibrary.org/obo/ncit.owl", + "id": "ncit:C66940", + "code": "C66940", + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, "relation": "exactMatch", } @@ -710,8 +723,9 @@ def civic_did8(): "mappings": [ { "coding": { + "id": "DOID:3908", "code": "DOID:3908", - "system": "http://purl.obolibrary.org/obo/doid.owl", + "system": "https://disease-ontology.org/?id=", }, "relation": "exactMatch", } @@ -750,8 +764,9 @@ def civic_tid28(): "mappings": [ { "coding": { - "code": "ncit:C1857", - "system": "http://purl.obolibrary.org/obo/ncit.owl", + "id": "ncit:C1857", + "code": "C1857", + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, "relation": "exactMatch", } @@ -829,8 +844,9 @@ def civic_tid16(cetuximab_extensions): "mappings": [ { "coding": { - "code": "ncit:C1723", - "system": "http://purl.obolibrary.org/obo/ncit.owl", + "id": "ncit:C1723", + "code": "C1723", + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, "relation": "exactMatch", } @@ -880,8 +896,9 @@ def civic_tid483(encorafenib_extensions): "mappings": [ { "coding": { - "code": "ncit:C98283", - "system": "http://purl.obolibrary.org/obo/ncit.owl", + "id": "ncit:C98283", + "code": "C98283", + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, "relation": "exactMatch", } @@ -919,8 +936,9 @@ def civic_did11(): "mappings": [ { "coding": { + "id": "DOID:9256", "code": "DOID:9256", - "system": "http://purl.obolibrary.org/obo/doid.owl", + "system": "https://disease-ontology.org/?id=", }, "relation": "exactMatch", } @@ -1217,7 +1235,7 @@ def civic_mpid65(civic_vid65): { "coding": { "code": "CA123513", - "system": "https://reg.clinicalgenome.org/", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", }, "relation": "relatedMatch", }, @@ -1236,7 +1254,11 @@ def civic_mpid65(civic_vid65): "relation": "relatedMatch", }, { - "coding": {"code": "65", "system": "https://civicdb.org/variants/"}, + "coding": { + "id": "civic.vid:65", + "code": "65", + "system": "https://civicdb.org/variants/", + }, "relation": "exactMatch", }, ], @@ -1264,6 +1286,7 @@ def civic_mpid65(civic_vid65): "name": "Variant types", "value": [ { + "id": "SO:0001583", "code": "SO:0001583", "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", @@ -1294,7 +1317,8 @@ def civic_did3(): "mappings": [ { "coding": { - "system": "http://purl.obolibrary.org/obo/doid.owl", + "id": "DOID:9119", + "system": "https://disease-ontology.org/?id=", "code": "DOID:9119", }, "relation": "exactMatch", @@ -1328,7 +1352,8 @@ def civic_gid29(): { "coding": { "system": "https://www.ncbi.nlm.nih.gov/gene/", - "code": "ncbigene:3815", + "id": "ncbigene:3815", + "code": "3815", }, "relation": "exactMatch", } @@ -1472,6 +1497,7 @@ def moa_vid66(): "mappings": [ { "coding": { + "id": "moa.variant:66", "system": "https://moalmanac.org", "code": "66", }, @@ -1654,8 +1680,9 @@ def moa_chronic_myelogenous_leukemia(): "mappings": [ { "coding": { + "id": "oncotree:CML", "label": "Chronic Myelogenous Leukemia", - "system": "https://oncotree.mskcc.org", + "system": "https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=CODE&search=", "code": "CML", }, "relation": "exactMatch", diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index 362afffb..2f9bc77d 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -368,7 +368,7 @@ def test_categorical_variant_rules( variant_types = json.loads(cv["variant_types"]) for vt in variant_types: - assert set(vt.keys()) == {"label", "system", "code"} + assert set(vt.keys()) == {"id", "label", "system", "code"} def test_location_rules( diff --git a/tests/unit/transformers/test_civic_transformer_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py index 9a788190..dfdf7cc3 100644 --- a/tests/unit/transformers/test_civic_transformer_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -110,7 +110,7 @@ def civic_mpid99(): { "coding": { "code": "CA123194", - "system": "https://reg.clinicalgenome.org/", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", }, "relation": "relatedMatch", }, @@ -129,7 +129,11 @@ def civic_mpid99(): "relation": "relatedMatch", }, { - "coding": {"code": "99", "system": "https://civicdb.org/variants/"}, + "coding": { + "id": "civic.vid:99", + "code": "99", + "system": "https://civicdb.org/variants/", + }, "relation": "exactMatch", }, ], @@ -157,6 +161,7 @@ def civic_mpid99(): "name": "Variant types", "value": [ { + "id": "SO:0001583", "code": "SO:0001583", "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", @@ -177,7 +182,8 @@ def civic_gid38(): "mappings": [ { "coding": { - "code": "ncbigene:5156", + "id": "ncbigene:5156", + "code": "5156", "system": "https://www.ncbi.nlm.nih.gov/gene/", }, "relation": "exactMatch", @@ -210,8 +216,9 @@ def civic_did2(): "mappings": [ { "coding": { + "id": "DOID:9253", "code": "DOID:9253", - "system": "http://purl.obolibrary.org/obo/doid.owl", + "system": "https://disease-ontology.org/?id=", }, "relation": "exactMatch", } @@ -345,7 +352,7 @@ def civic_mpid113(): { "coding": { "code": "CA009082", - "system": "https://reg.clinicalgenome.org/", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", }, "relation": "relatedMatch", }, @@ -364,7 +371,11 @@ def civic_mpid113(): "relation": "relatedMatch", }, { - "coding": {"code": "113", "system": "https://civicdb.org/variants/"}, + "coding": { + "id": "civic.vid:113", + "code": "113", + "system": "https://civicdb.org/variants/", + }, "relation": "exactMatch", }, ], @@ -392,6 +403,7 @@ def civic_mpid113(): "name": "Variant types", "value": [ { + "id": "SO:0001583", "code": "SO:0001583", "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", @@ -412,7 +424,8 @@ def civic_gid42(): "mappings": [ { "coding": { - "code": "ncbigene:5979", + "id": "ncbigene:5979", + "code": "5979", "system": "https://www.ncbi.nlm.nih.gov/gene/", }, "relation": "exactMatch", @@ -455,8 +468,9 @@ def civic_did15(): "mappings": [ { "coding": { + "id": "DOID:3973", "code": "DOID:3973", - "system": "http://purl.obolibrary.org/obo/doid.owl", + "system": "https://disease-ontology.org/?id=", }, "relation": "exactMatch", } diff --git a/tests/unit/transformers/test_moa_transformer_prognostic.py b/tests/unit/transformers/test_moa_transformer_prognostic.py index 804f8d1a..65d77914 100644 --- a/tests/unit/transformers/test_moa_transformer_prognostic.py +++ b/tests/unit/transformers/test_moa_transformer_prognostic.py @@ -98,6 +98,7 @@ def moa_vid141(): "mappings": [ { "coding": { + "id": "moa.variant:141", "system": "https://moalmanac.org", "code": "141", }, @@ -128,8 +129,9 @@ def moa_myelodysplasia(): { "coding": { "label": "Myelodysplasia", - "system": "https://oncotree.mskcc.org", + "system": "https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=CODE&search=", "code": "MDS", + "id": "oncotree:MDS", }, "relation": "exactMatch", } @@ -263,6 +265,7 @@ def moa_vid532(): "mappings": [ { "coding": { + "id": "moa.variant:532", "system": "https://moalmanac.org", "code": "532", }, diff --git a/tests/unit/transformers/test_moa_transformer_therapeutic.py b/tests/unit/transformers/test_moa_transformer_therapeutic.py index 44857f0a..a7fe5dbc 100644 --- a/tests/unit/transformers/test_moa_transformer_therapeutic.py +++ b/tests/unit/transformers/test_moa_transformer_therapeutic.py @@ -81,6 +81,7 @@ def moa_vid144(braf_v600e_genomic): "mappings": [ { "coding": { + "id": "moa.variant:144", "system": "https://moalmanac.org", "code": "144", }, @@ -164,8 +165,9 @@ def moa_aid154_study_stmt(moa_vid144, moa_cetuximab, moa_encorafenib, moa_method { "coding": { "label": "Colorectal Adenocarcinoma", - "system": "https://oncotree.mskcc.org", + "system": "https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=CODE&search=", "code": "COADREAD", + "id": "oncotree:COADREAD", }, "relation": "exactMatch", }