Skip to content

Commit

Permalink
Merge branch 'issue-417' into issue-426
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Feb 6, 2025
2 parents 44cbfb0 + ea833b2 commit 9d96296
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 71 deletions.
80 changes: 43 additions & 37 deletions src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
from pathlib import Path
from typing import ClassVar, TypeVar

from disease.schemas import (
SYSTEM_URI_TO_NAMESPACE as DISEASE_SYSTEM_URI_TO_NAMESPACE,
)
from disease.schemas import (
NamespacePrefix as DiseaseNamespacePrefix,
)
Expand All @@ -35,7 +32,12 @@
)
from ga4gh.va_spec.base import Document, Method, TherapyGroup
from ga4gh.vrs.models import Allele
from gene.schemas import NormalizeService as NormalizedGene
from gene.schemas import (
NamespacePrefix as GeneNamespacePrefix,
)
from gene.schemas import (
NormalizeService as NormalizedGene,
)
from pydantic import BaseModel, Field, StrictStr, ValidationError
from therapy.schemas import NormalizationService as NormalizedTherapy

Expand Down Expand Up @@ -557,20 +559,24 @@ def _get_vicc_normalizer_mappings(
:return: List of VICC Normalizer data represented as mappable concept
"""

def _add_merged_id_ext(
def _update_mapping(
mapping: ConceptMapping,
is_priority: bool,
label: str | None = None,
normalized_id: str,
normalizer_label: str,
) -> Extension:
"""Update ``mapping`` to include extension on whether mapping is from merged identifier
"""Update ``mapping`` to include extension on whether ``mapping`` contains
code that matches the merged record's primary identifier.
:param mapping: ConceptMapping from vicc normalizer. This will be mutated.
:param is_priority: ``True`` if concept mapping contains primaryCode that
matches merged record primaryCode. ``False`` otherwise (meaning it comes
from merged record mappings)
:param label: Merged concept label, if found
:return: ConceptMapping with normalizer extension added
Extensions will be added. Label will be added if mapping identifier
matches normalized merged identifier.
:param normalized_id: Concept ID from normalized record
:param normalizer_label: Label from normalized record
:return: ConceptMapping with normalizer extension added as well as label (
if mapping id matches normalized merged id)
"""
is_priority = normalized_id == mapping.coding.code.root

merged_id_ext = Extension(
name=NormalizerExtensionName.PRIORITY.value, value=is_priority
)
Expand All @@ -579,40 +585,40 @@ def _add_merged_id_ext(
else:
mapping.extensions = [merged_id_ext]

if label:
mapping.coding.label = label
if is_priority:
mapping.coding.label = normalizer_label

return mapping

mappings: list[ConceptMapping] = []
attr_name = NORMALIZER_INSTANCE_TO_ATTR[type(normalizer_resp)]
normalizer_resp_obj = getattr(normalizer_resp, attr_name)
normalizer_label = normalizer_resp_obj.label
is_disease = isinstance(normalizer_resp, NormalizedDisease)
is_gene = isinstance(normalizer_resp, NormalizedGene)

normalizer_mappings = normalizer_resp_obj.mappings or []
if isinstance(normalizer_resp, NormalizedDisease):
for mapping in normalizer_mappings:
for mapping in normalizer_mappings:
if normalized_id == mapping.coding.code.root:
mappings.append(
_update_mapping(mapping, normalized_id, normalizer_label)
)
else:
mapping_code_lower = mapping.coding.code.root.lower()
if (
DISEASE_SYSTEM_URI_TO_NAMESPACE.get(mapping.coding.system)
== DiseaseNamespacePrefix.MONDO.value
is_disease
and mapping_code_lower.startswith(
DiseaseNamespacePrefix.MONDO.value
)
) or (
is_gene
and mapping_code_lower.startswith(
(GeneNamespacePrefix.NCBI.value, GeneNamespacePrefix.HGNC.value)
)
):
mappings.append(_add_merged_id_ext(mapping, is_priority=False))
else:
if normalized_id == mapping.coding.code.root:
mappings.append(
_add_merged_id_ext(
mapping,
label=normalizer_resp_obj.label,
is_priority=True,
)
)
else:
mappings.extend(
_add_merged_id_ext(
mapping, label=normalizer_resp_obj.label, is_priority=True
)
for mapping in normalizer_mappings
if normalized_id == mapping.coding.code.root
)
mappings.append(
_update_mapping(mapping, normalized_id, normalizer_label)
)
return mappings

def create_json(self, cdm_filepath: Path | None = None) -> None:
Expand Down
53 changes: 39 additions & 14 deletions src/metakb/transformers/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,20 +816,27 @@ def _add_genes(self, genes: list[dict]) -> None:
:param genes: All genes in CIViC
"""

def _get_ncbi_concept_mapping(ncbigene_id: str, gene: dict) -> ConceptMapping:
"""Get NCBI gene mapping
:param ncbigene_id: ID for NCBI Gene
:param gene: CIViC gene record
:return: Concept Mapping for NCBI Gene
"""
return ConceptMapping(
coding=Coding(
id=ncbigene_id,
code=str(gene["entrez_id"]),
system="https://www.ncbi.nlm.nih.gov/gene/",
),
relation=Relation.EXACT_MATCH,
)

for gene in genes:
gene_id = f"civic.gid:{gene['id']}"
ncbigene = f"ncbigene:{gene['entrez_id']}"
queries = [ncbigene, gene["name"]] + gene["aliases"]
mappings = [
ConceptMapping(
coding=Coding(
id=ncbigene,
code=str(gene["entrez_id"]),
system="https://www.ncbi.nlm.nih.gov/gene/",
),
relation=Relation.EXACT_MATCH,
),
]
extensions = []

gene_norm_resp, normalized_gene_id = self.vicc_normalizers.normalize_gene(
Expand All @@ -843,13 +850,31 @@ def _add_genes(self, genes: list[dict]) -> None:
queries,
)
extensions.append(self._get_vicc_normalizer_failure_ext())
mappings = [_get_ncbi_concept_mapping(ncbigene, gene)]
else:
mappings.extend(
self._get_vicc_normalizer_mappings(
normalized_gene_id, gene_norm_resp
)
mappings = self._get_vicc_normalizer_mappings(
normalized_gene_id, gene_norm_resp
)

civic_ncbi_annotation_match = False
for mapping in mappings:
if mapping.coding.code.root.startswith("ncbigene:"):
if mapping.coding.code.root == ncbigene:
mapping.extensions.append(
Extension(name="civic_annotation", value=True)
)
civic_ncbi_annotation_match = True
break

_logger.debug(
"CIViC NCBI gene and Gene Normalizer mismatch: %s vs %s",
ncbigene,
mapping.coding.code.root,
)

if not civic_ncbi_annotation_match:
mappings.append(_get_ncbi_concept_mapping(ncbigene, gene))

if gene["aliases"]:
extensions.append(Extension(name="aliases", value=gene["aliases"]))

Expand Down
44 changes: 33 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def pytest_configure(config):
logging.getLogger(lib).setLevel(logging.ERROR)


def get_vicc_normalizer_ext(is_priority: bool):
"""Create test fixture for vicc normalizer priority extension"""
return [{"name": "vicc_normalizer_priority", "value": is_priority}]


def check_source_harvest(tmp_path: Path, harvester: Harvester):
"""Test that source harvest method works correctly"""
harvested_data = harvester.harvest()
Expand Down Expand Up @@ -499,11 +504,14 @@ def civic_gid5(braf_normalizer_mappings):
"mappings": [
{
"coding": {
"id": "ncbigene:673",
"code": "673",
"code": "ncbigene:673",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "exactMatch",
"relation": "relatedMatch",
"extensions": [
*get_vicc_normalizer_ext(is_priority=False),
{"name": "civic_annotation", "value": True},
],
},
*braf_normalizer_mappings,
],
Expand Down Expand Up @@ -732,11 +740,14 @@ def civic_gid19():
"mappings": [
{
"coding": {
"id": "ncbigene:1956",
"code": "1956",
"code": "ncbigene:1956",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "exactMatch",
"relation": "relatedMatch",
"extensions": [
*get_vicc_normalizer_ext(is_priority=False),
{"name": "civic_annotation", "value": True},
],
},
{
"coding": {
Expand Down Expand Up @@ -1568,10 +1579,13 @@ def civic_gid29():
{
"coding": {
"system": "https://www.ncbi.nlm.nih.gov/gene/",
"id": "ncbigene:3815",
"code": "3815",
"code": "ncbigene:3815",
},
"relation": "exactMatch",
"relation": "relatedMatch",
"extensions": [
*get_vicc_normalizer_ext(is_priority=False),
{"name": "civic_annotation", "value": True},
],
},
{
"coding": {
Expand Down Expand Up @@ -1781,8 +1795,16 @@ def moa_abl1():
"system": "https://www.genenames.org",
},
"relation": "exactMatch",
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
}
"extensions": get_vicc_normalizer_ext(is_priority=True),
},
{
"coding": {
"code": "ncbigene:25",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "relatedMatch",
"extensions": get_vicc_normalizer_ext(is_priority=False),
},
],
}

Expand Down
24 changes: 17 additions & 7 deletions tests/unit/transformers/test_civic_transformer_diagnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORMERS_DIR, get_vicc_normalizer_priority_ext
from tests.conftest import (
TEST_TRANSFORMERS_DIR,
get_vicc_normalizer_ext,
get_vicc_normalizer_priority_ext,
)

from metakb.transformers.civic import CivicTransformer

Expand Down Expand Up @@ -184,11 +188,14 @@ def civic_gid38():
"mappings": [
{
"coding": {
"id": "ncbigene:5156",
"code": "5156",
"code": "ncbigene:5156",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "exactMatch",
"relation": "relatedMatch",
"extensions": [
*get_vicc_normalizer_ext(is_priority=False),
{"name": "civic_annotation", "value": True},
],
},
{
"coding": {
Expand Down Expand Up @@ -460,11 +467,14 @@ def civic_gid42():
"mappings": [
{
"coding": {
"id": "ncbigene:5979",
"code": "5979",
"code": "ncbigene:5979",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "exactMatch",
"relation": "relatedMatch",
"extensions": [
*get_vicc_normalizer_ext(is_priority=False),
{"name": "civic_annotation", "value": True},
],
},
{
"coding": {
Expand Down
22 changes: 21 additions & 1 deletion tests/unit/transformers/test_moa_transformer_prognostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORMERS_DIR, get_vicc_normalizer_priority_ext
from tests.conftest import (
TEST_TRANSFORMERS_DIR,
get_vicc_normalizer_ext,
get_vicc_normalizer_priority_ext,
)

from metakb.transformers.moa import MoaTransformer

Expand Down Expand Up @@ -162,6 +166,14 @@ def moa_bcor():
"relation": "exactMatch",
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
},
{
"coding": {
"code": "ncbigene:54880",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "relatedMatch",
"extensions": get_vicc_normalizer_ext(is_priority=False),
},
],
}

Expand Down Expand Up @@ -329,6 +341,14 @@ def moa_sf3b1():
"relation": "exactMatch",
"extensions": get_vicc_normalizer_priority_ext(is_priority=True),
},
{
"coding": {
"code": "ncbigene:23451",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "relatedMatch",
"extensions": get_vicc_normalizer_ext(is_priority=False),
},
],
}

Expand Down
Loading

0 comments on commit 9d96296

Please sign in to comment.