cancervariants · korikuzma · Dec 18, 2024 · Dec 11, 2024 · Dec 13, 2024
diff --git a/src/metakb/harvesters/moa.py b/src/metakb/harvesters/moa.py
@@ -151,24 +151,27 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict
         assertion_record = {
             "id": assertion["assertion_id"],
             "context": assertion["context"],
+            "deprecated": assertion["deprecated"],
             "description": assertion["description"],
             "disease": {
                 "name": assertion["disease"],
                 "oncotree_code": assertion["oncotree_code"],
                 "oncotree_term": assertion["oncotree_term"],
             },
-            "therapy_name": assertion["therapy_name"],
-            "therapy_type": assertion["therapy_type"],
-            "clinical_significance": self._get_therapy(
-                assertion["therapy_resistance"], assertion["therapy_sensitivity"]
-            ),
+            "therapy": {
+                "name": assertion["therapy_name"],
+                "type": assertion["therapy_type"],
+                "strategy": assertion["therapy_strategy"],
+                "resistance": assertion["therapy_resistance"],
+                "sensitivity": assertion["therapy_sensitivity"],
+            },
             "predictive_implication": assertion["predictive_implication"],
             "favorable_prognosis": assertion["favorable_prognosis"],
             "created_on": assertion["created_on"],
             "last_updated": assertion["last_updated"],
             "submitted_by": assertion["submitted_by"],
             "validated": assertion["validated"],
-            "source_ids": assertion["sources"][0]["source_id"],
+            "source_id": assertion["sources"][0]["source_id"],
         }
 
         for v in variants_list:
@@ -177,19 +180,6 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict
 
         return assertion_record
 
-    def _get_therapy(self, resistance: bool, sensitivity: bool) -> str | None:
-        """Get therapy response data.
-
-        :param resistance: `True` if Therapy Resistance. `False` if not Therapy Resistance
-        :param sensitivity: `True` if Therapy Sensitivity. `False` if not Therapy Sensitivity
-        :return: whether the therapy response is resistance or sensitivity
-        """
-        if resistance:
-            return "resistance"
-        if sensitivity:
-            return "sensitivity"
-        return None
-
     def _get_feature(self, v: dict) -> dict:
         """Get feature name from the harvested variants
 

diff --git a/src/metakb/transformers/moa.py b/src/metakb/transformers/moa.py
@@ -8,9 +8,11 @@
 from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint
 from ga4gh.core import sha512t24u
 from ga4gh.core.domain_models import (
+    CombinationTherapy,
     Disease,
     Gene,
     TherapeuticAgent,
+    TherapeuticSubstituteGroup,
 )
 from ga4gh.core.entity_models import (
     Coding,
@@ -21,7 +23,9 @@
 )
 from ga4gh.va_spec.profiles.var_study_stmt import (
     AlleleOriginQualifier,
+    PrognosticPredicate,
     TherapeuticResponsePredicate,
+    VariantPrognosticStudyStatement,
     VariantTherapeuticResponseStudyStatement,
 )
 from ga4gh.vrs.models import Variation
@@ -85,133 +89,97 @@ async def transform(self, harvested_data: MoaHarvestedData) -> None:
         self._add_documents(harvested_data.sources)
 
         # Add variant therapeutic response study statement data. Will update `statements`
-        await self._add_variant_tr_study_stmts(harvested_data.assertions)
+        for assertion in harvested_data.assertions:
+            await self._add_variant_study_stmt(assertion)
 
-    async def _add_variant_tr_study_stmts(self, assertions: list[dict]) -> None:
-        """Create Variant Therapeutic Response Study Statements from MOA assertions.
+    async def _add_variant_study_stmt(self, assertion: dict) -> None:
+        """Create Variant Study Statements from MOA assertions.
         Will add associated values to ``processed_data`` instance variable
         (``therapeutic_procedures``, ``conditions``, and ``statements``).
         ``able_to_normalize`` and ``unable_to_normalize`` will
         also be mutated for associated therapeutic_procedures and conditions.
 
-        :param assertions: A list of MOA assertion records
+        :param assertions: MOA assertion record
         """
-        for record in assertions:
-            assertion_id = f"moa.assertion:{record['id']}"
-            variant_id = record["variant"]["id"]
+        assertion_id = f"moa.assertion:{assertion['id']}"
+        variant_id = assertion["variant"]["id"]
 
-            # Check cache for variation record (which contains gene information)
-            variation_gene_map = self.able_to_normalize["variations"].get(variant_id)
-            if not variation_gene_map:
-                logger.debug(
-                    "%s has no variation for variant_id %s", assertion_id, variant_id
-                )
-                continue
-
-            # Get predicate. We only support therapeutic resistance/sensitivity
-            if record["clinical_significance"] == "resistance":
-                predicate = TherapeuticResponsePredicate.RESISTANCE
-            elif record["clinical_significance"] == "sensitivity":
-                predicate = TherapeuticResponsePredicate.SENSITIVITY
-            else:
-                logger.debug(
-                    "clinical_significance not supported: %s",
-                    record["clinical_significance"],
-                )
-                continue
+        # Check cache for variation record (which contains gene information)
+        variation_gene_map = self.able_to_normalize["variations"].get(variant_id)
+        if not variation_gene_map:
+            logger.debug(
+                "%s has no variation for variant_id %s", assertion_id, variant_id
+            )
+            return
+
+        # Get strength
+        predictive_implication = (
+            assertion["predictive_implication"]
+            .strip()
+            .replace(" ", "_")
+            .replace("-", "_")
+            .upper()
+        )
+        moa_evidence_level = MoaEvidenceLevel[predictive_implication]
+        strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level]
 
-            # Get strength
-            predictive_implication = (
-                record["predictive_implication"]
-                .strip()
-                .replace(" ", "_")
-                .replace("-", "_")
-                .upper()
+        # Add disease
+        moa_disease = self._add_disease(assertion["disease"])
+        if not moa_disease:
+            logger.debug(
+                "%s has no disease for disease %s", assertion_id, assertion["disease"]
             )
-            moa_evidence_level = MoaEvidenceLevel[predictive_implication]
-            strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level]
+            return
 
-            # Add therapeutic agent. We only support one therapy, so we will skip others
-            therapy_name = record["therapy_name"]
-            if not therapy_name:
-                logger.debug("%s has no therapy_name", assertion_id)
-                continue
+        # Add document
+        document = self.able_to_normalize["documents"].get(assertion["source_id"])
 
-            therapy_interaction_type = record["therapy_type"]
-
-            if "+" in therapy_name:
-                # Indicates multiple therapies
-                if therapy_interaction_type.upper() in {
-                    "COMBINATION THERAPY",
-                    "IMMUNOTHERAPY",
-                    "RADIATION THERAPY",
-                    "TARGETED THERAPY",
-                }:
-                    therapeutic_procedure_type = (
-                        TherapeuticProcedureType.COMBINATION_THERAPY
-                    )
-                else:
-                    # skipping HORMONE and CHEMOTHERAPY for now
-                    continue
+        feature_type = assertion["variant"]["feature_type"]
+        if feature_type == "somatic_variant":
+            allele_origin_qualifier = AlleleOriginQualifier.SOMATIC
+        elif feature_type == "germline_variant":
+            allele_origin_qualifier = AlleleOriginQualifier.GERMLINE
+        else:
+            allele_origin_qualifier = None
+
+        params = {
+            "id": assertion_id,
+            "description": assertion["description"],
+            "strength": strength,
+            "subjectVariant": variation_gene_map["cv"],
+            "alleleOriginQualifier": allele_origin_qualifier,
+            "geneContextQualifier": variation_gene_map["moa_gene"],
+            "specifiedBy": self.processed_data.methods[0],
+            "reportedIn": [document],
+        }
 
-                therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")]
-                therapeutic_digest = self._get_digest_for_str_lists(
-                    [f"moa.therapy:{tn}" for tn in therapies]
-                )
-                therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}"
-            else:
-                therapeutic_procedure_id = f"moa.therapy:{therapy_name}"
-                therapies = [{"label": therapy_name}]
-                therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT
-
-            moa_therapeutic = self._add_therapeutic_procedure(
-                therapeutic_procedure_id,
-                therapies,
-                therapeutic_procedure_type,
-                therapy_interaction_type,
+        if assertion["favorable_prognosis"] == "":
+            params["conditionQualifier"] = moa_disease
+            params["predicate"] = (
+                TherapeuticResponsePredicate.RESISTANCE
+                if assertion["therapy"]["resistance"]
+                else TherapeuticResponsePredicate.SENSITIVITY
             )
+            params["objectTherapeutic"] = self._get_therapeutic_procedure(assertion)
 
-            if not moa_therapeutic:
+            if not params["objectTherapeutic"]:
                 logger.debug(
-                    "%s has no therapeutic agent for therapy_name %s",
+                    "%s has no therapeutic procedure for therapy_name %s",
                     assertion_id,
-                    therapy_name,
-                )
-                continue
-
-            # Add disease
-            moa_disease = self._add_disease(record["disease"])
-            if not moa_disease:
-                logger.debug(
-                    "%s has no disease for disease %s", assertion_id, record["disease"]
+                    assertion["therapy"]["name"],
                 )
-                continue
-
-            # Add document
-            document = self.able_to_normalize["documents"].get(record["source_ids"])
-
-            feature_type = record["variant"]["feature_type"]
-            if feature_type == "somatic_variant":
-                allele_origin_qualifier = AlleleOriginQualifier.SOMATIC
-            elif feature_type == "germline_variant":
-                allele_origin_qualifier = AlleleOriginQualifier.GERMLINE
-            else:
-                allele_origin_qualifier = None
-
-            statement = VariantTherapeuticResponseStudyStatement(
-                id=assertion_id,
-                description=record["description"],
-                strength=strength,
-                predicate=predicate,
-                subjectVariant=variation_gene_map["cv"],
-                objectTherapeutic=moa_therapeutic,
-                conditionQualifier=moa_disease,
-                alleleOriginQualifier=allele_origin_qualifier,
-                geneContextQualifier=variation_gene_map["moa_gene"],
-                specifiedBy=self.processed_data.methods[0],
-                reportedIn=[document],
+                return
+            statement = VariantTherapeuticResponseStudyStatement(**params)
+        else:
+            params["objectCondition"] = moa_disease
+            params["predicate"] = (
+                PrognosticPredicate.BETTER_OUTCOME
+                if assertion["favorable_prognosis"]
+                else PrognosticPredicate.WORSE_OUTCOME
             )
-            self.processed_data.statements.append(statement)
+            statement = VariantPrognosticStudyStatement(**params)
+
+        self.processed_data.statements.append(statement)
 
     async def _add_categorical_variants(self, variants: list[dict]) -> None:
         """Create Categorical Variant objects for all MOA variant records.
@@ -437,6 +405,54 @@ def _add_documents(self, sources: list) -> None:
             self.able_to_normalize["documents"][source_id] = document
             self.processed_data.documents.append(document)
 
+    def _get_therapeutic_procedure(
+        self, assertion: dict
+    ) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None:
+        """Get therapeutic procedure object
+
+        :param assertion: MOA assertion record
+        :return: Therapeutic procedure object, if found and able to be normalized
+        """
+        therapy = assertion["therapy"]
+        therapy_name = therapy["name"]
+        if not therapy_name:
+            logger.debug("%s has no therapy_name", assertion["id"])
+            return None
+
+        therapy_interaction_type = therapy["type"]
+
+        if "+" in therapy_name:
+            # Indicates multiple therapies
+            if therapy_interaction_type.upper() in {
+                "COMBINATION THERAPY",
+                "IMMUNOTHERAPY",
+                "RADIATION THERAPY",
+                "TARGETED THERAPY",
+            }:
+                therapeutic_procedure_type = (
+                    TherapeuticProcedureType.COMBINATION_THERAPY
+                )
+            else:
+                # skipping HORMONE and CHEMOTHERAPY for now
+                return None
+
+            therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")]
+            therapeutic_digest = self._get_digest_for_str_lists(
+                [f"moa.therapy:{tn}" for tn in therapies]
+            )
+            therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}"
+        else:
+            therapeutic_procedure_id = f"moa.therapy:{therapy_name}"
+            therapies = [{"label": therapy_name}]
+            therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT
+
+        return self._add_therapeutic_procedure(
+            therapeutic_procedure_id,
+            therapies,
+            therapeutic_procedure_type,
+            therapy_interaction_type,
+        )
+
     def _get_therapeutic_substitute_group(
         self,
         therapeutic_sub_group_id: str,