Skip to content

Commit

Permalink
feat!: moa disease conflict resolution should follow therapy conflict…
Browse files Browse the repository at this point in the history
… resolution
  • Loading branch information
korikuzma committed Feb 3, 2025
1 parent 94a78a7 commit 6faa759
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 51 deletions.
114 changes: 72 additions & 42 deletions src/metakb/transformers/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,65 @@ def _get_therapeutic_substitute_group(
:return: None, since not supported by MOA
"""

def _resolve_concept_discrepancy(
self,
cached_id: str,
cached_obj: MappableConcept,
cached_label: str,
moa_concept_label: str,
is_disease: bool = False,
) -> None:
"""Resolve conflict where MOA disease or therapy resolve to same normalized
concept
The min label will be used as the primary label for the mappable concept, and
the other label will be added as an alias in extensions.
The cache will be updated with updated object.
The cached object will be removed from ``self.processed_data``
:param cached_id: ID found in cache
:param cached_obj: Mappable concept found in cache for ``cached_id``. This will
be mutated
:param cached_label: Label for ``cached_obj``
:param moa_concept_label: MOA concept label
:param is_disease: ``True`` if ``cached_obj`` is a disease. ``False`` if
``cached_obj`` is a therapy
"""
logger.debug(
"MOA %s and %s resolve to same concept %s",
moa_concept_label,
cached_label,
cached_id,
)
alias = max(moa_concept_label, cached_label)
cached_obj.label = min(moa_concept_label, cached_label)
extensions = cached_obj.extensions or []

aliases_ext = next(
(ext for ext in extensions if ext.name == "aliases"),
None,
)
if aliases_ext:
if cached_obj.label in aliases_ext.value:
aliases_ext.value.remove(cached_obj.label)
aliases_ext.value.append(alias)
else:
extensions.append(Extension(name="aliases", value=[alias]))
cached_obj.extensions = extensions

if is_disease:
self.processed_data.conditions = [
c for c in self.processed_data.conditions if c.id != cached_obj.id
]
cache = self._cache.conditions
else:
self.processed_data.therapies = [
t for t in self.processed_data.therapies if t.id != cached_id
]
cache = self._cache.normalized_therapies

cache[cached_id] = cached_obj

def _get_therapy(self, therapy_id: str, therapy: dict) -> MappableConcept:
"""Get Therapy mappable concept for a MOA therapy name.
Expand Down Expand Up @@ -525,37 +584,13 @@ def _resolve_therapy_discrepancy(
therapy_norm_obj = self._cache.normalized_therapies[cached_id]
og_therapy_norm_label = therapy_norm_obj.label
if moa_concept_label != og_therapy_norm_label:
logger.debug(
"MOA therapy %s and %s resolve to same concept %s",
moa_concept_label,
og_therapy_norm_label,
self._resolve_concept_discrepancy(
cached_id,
therapy_norm_obj,
og_therapy_norm_label,
moa_concept_label,
is_disease=False,
)
alias = max(moa_concept_label, og_therapy_norm_label)
therapy_norm_obj.label = min(moa_concept_label, og_therapy_norm_label)
extensions = therapy_norm_obj.extensions or []

aliases_ext = next(
(
ext
for ext in therapy_norm_obj.extensions
if ext.name == "aliases"
),
None,
)
if aliases_ext:
if therapy_norm_obj.label in aliases_ext.value:
aliases_ext.value.remove(therapy_norm_obj.label)
aliases_ext.value.append(alias)
else:
extensions.append(Extension(name="aliases", value=[alias]))
therapy_norm_obj.extensions = extensions

# Remove from processed (it will be added back in _add_therapy)
self.processed_data.therapies = [
t for t in self.processed_data.therapies if t.id != cached_id
]
self._cache.normalized_therapies[cached_id] = therapy_norm_obj
return therapy_norm_obj

mappings = []
Expand Down Expand Up @@ -630,23 +665,18 @@ def _add_disease(self, disease: dict) -> MappableConcept | None:
oncotree_kv = [f"{oncotree_key}:{oncotree_value}"]
blob = json.dumps(oncotree_kv, separators=(",", ":")).encode("ascii")
disease_id = sha512t24u(blob)

moa_disease = self._cache.conditions.get(disease_id)
if moa_disease:
source_disease_name = disease["name"]
if source_disease_name != moa_disease.label:
if not moa_disease.extensions:
moa_disease.extensions = [
Extension(name="aliases", value=[source_disease_name])
]
else:
for ext in moa_disease.extensions:
if (
ext.name == "aliases"
and source_disease_name not in ext.value
):
ext.value.append(source_disease_name)
break
self._resolve_concept_discrepancy(
disease_id,
moa_disease,
moa_disease.label,
source_disease_name,
is_disease=True,
)
self.processed_data.conditions.append(moa_disease)
return moa_disease

moa_disease = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"deprecated": false,
"description": "The U.S. Food and Drug Administration (FDA) granted approval to selpercatinib for the treatment of adult patients with locally advanced or metastatic non-small cell lung cancer (NSCLC) with a RET gene fusion, as detected by an FDA-approved test.",
"disease": {
"name": "Non-Small Cell Lung Cancer",
"oncotree_code": "NSCLC",
"oncotree_term": "Non-Small Cell Lung Cancer"
"name": "Myelodysplastic Syndromes",
"oncotree_code": "MDS",
"oncotree_term": "Myelodysplastic Syndromes"
},
"therapy": {
"name": "Selpercatinib",
Expand Down Expand Up @@ -40,9 +40,9 @@
"deprecated": false,
"description": "Certain missesnse mutations may predict resistance to RET inhibitors",
"disease": {
"name": "Medullary Thyroid Cancer",
"oncotree_code": "THME",
"oncotree_term": "Medullary Thyroid Cancer"
"name": "Myelodysplasia",
"oncotree_code": "MDS",
"oncotree_term": "Myelodysplasia"
},
"therapy": {
"name": "LOXO-292",
Expand Down
21 changes: 18 additions & 3 deletions tests/unit/transformers/test_moa_transformer_therapeutic.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,11 @@ def test_moa_cdm_not_normalizable(


@pytest.mark.asyncio()
async def test_moa_therapy_conflict(normalizers):
"""Test that MOA therapy conflict merges concept correctly"""
async def test_moa_concept_conflicts(normalizers):
"""Test that MOA therapy and disease conflict resolution works correctly"""
t = MoaTransformer(
data_dir=DATA_DIR,
harvester_path=DATA_DIR / "moa_harvester_conflict.json",
harvester_path=DATA_DIR / "moa_harvester_conflicts.json",
normalizers=normalizers,
)
harvested_data = t.extract_harvested_data()
Expand All @@ -428,3 +428,18 @@ async def test_moa_therapy_conflict(normalizers):
"name": "aliases",
"value": ["Selpercatinib"],
}

conditions = t.processed_data.conditions
assert len(conditions) == 1

condition = conditions[0]
assert condition.id == "moa.normalize.disease.ncit:C3247"
assert condition.label == "Myelodysplasia"
condition_alias_ext = next(
(ext for ext in condition.extensions if ext.name == "aliases"),
None,
)
assert condition_alias_ext.model_dump(exclude_none=True) == {
"name": "aliases",
"value": ["Myelodysplastic Syndromes"],
}

0 comments on commit 6faa759

Please sign in to comment.