Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add support for moa prognostic assertions #411

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 9 additions & 19 deletions src/metakb/harvesters/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,24 +151,27 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict
assertion_record = {
"id": assertion["assertion_id"],
"context": assertion["context"],
"deprecated": assertion["deprecated"],
"description": assertion["description"],
"disease": {
"name": assertion["disease"],
"oncotree_code": assertion["oncotree_code"],
"oncotree_term": assertion["oncotree_term"],
},
"therapy_name": assertion["therapy_name"],
"therapy_type": assertion["therapy_type"],
"clinical_significance": self._get_therapy(
assertion["therapy_resistance"], assertion["therapy_sensitivity"]
),
"therapy": {
"name": assertion["therapy_name"],
"type": assertion["therapy_type"],
"strategy": assertion["therapy_strategy"],
"resistance": assertion["therapy_resistance"],
"sensitivity": assertion["therapy_sensitivity"],
},
"predictive_implication": assertion["predictive_implication"],
"favorable_prognosis": assertion["favorable_prognosis"],
"created_on": assertion["created_on"],
"last_updated": assertion["last_updated"],
"submitted_by": assertion["submitted_by"],
"validated": assertion["validated"],
"source_ids": assertion["sources"][0]["source_id"],
"source_id": assertion["sources"][0]["source_id"],
}

for v in variants_list:
Expand All @@ -177,19 +180,6 @@ def _harvest_assertion(self, assertion: dict, variants_list: list[dict]) -> dict

return assertion_record

def _get_therapy(self, resistance: bool, sensitivity: bool) -> str | None:
"""Get therapy response data.

:param resistance: `True` if Therapy Resistance. `False` if not Therapy Resistance
:param sensitivity: `True` if Therapy Sensitivity. `False` if not Therapy Sensitivity
:return: whether the therapy response is resistance or sensitivity
"""
if resistance:
return "resistance"
if sensitivity:
return "sensitivity"
return None

def _get_feature(self, v: dict) -> dict:
"""Get feature name from the harvested variants

Expand Down
232 changes: 124 additions & 108 deletions src/metakb/transformers/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from ga4gh.cat_vrs.core_models import CategoricalVariant, DefiningContextConstraint
from ga4gh.core import sha512t24u
from ga4gh.core.domain_models import (
CombinationTherapy,
Disease,
Gene,
TherapeuticAgent,
TherapeuticSubstituteGroup,
)
from ga4gh.core.entity_models import (
Coding,
Expand All @@ -21,7 +23,9 @@
)
from ga4gh.va_spec.profiles.var_study_stmt import (
AlleleOriginQualifier,
PrognosticPredicate,
TherapeuticResponsePredicate,
VariantPrognosticStudyStatement,
VariantTherapeuticResponseStudyStatement,
)
from ga4gh.vrs.models import Variation
Expand Down Expand Up @@ -85,133 +89,97 @@ async def transform(self, harvested_data: MoaHarvestedData) -> None:
self._add_documents(harvested_data.sources)

# Add variant therapeutic response study statement data. Will update `statements`
await self._add_variant_tr_study_stmts(harvested_data.assertions)
for assertion in harvested_data.assertions:
await self._add_variant_study_stmt(assertion)

async def _add_variant_tr_study_stmts(self, assertions: list[dict]) -> None:
"""Create Variant Therapeutic Response Study Statements from MOA assertions.
async def _add_variant_study_stmt(self, assertion: dict) -> None:
"""Create Variant Study Statements from MOA assertions.
Will add associated values to ``processed_data`` instance variable
(``therapeutic_procedures``, ``conditions``, and ``statements``).
``able_to_normalize`` and ``unable_to_normalize`` will
also be mutated for associated therapeutic_procedures and conditions.

:param assertions: A list of MOA assertion records
:param assertions: MOA assertion record
"""
for record in assertions:
assertion_id = f"moa.assertion:{record['id']}"
variant_id = record["variant"]["id"]
assertion_id = f"moa.assertion:{assertion['id']}"
variant_id = assertion["variant"]["id"]

# Check cache for variation record (which contains gene information)
variation_gene_map = self.able_to_normalize["variations"].get(variant_id)
if not variation_gene_map:
logger.debug(
"%s has no variation for variant_id %s", assertion_id, variant_id
)
continue

# Get predicate. We only support therapeutic resistance/sensitivity
if record["clinical_significance"] == "resistance":
predicate = TherapeuticResponsePredicate.RESISTANCE
elif record["clinical_significance"] == "sensitivity":
predicate = TherapeuticResponsePredicate.SENSITIVITY
else:
logger.debug(
"clinical_significance not supported: %s",
record["clinical_significance"],
)
continue
# Check cache for variation record (which contains gene information)
variation_gene_map = self.able_to_normalize["variations"].get(variant_id)
if not variation_gene_map:
logger.debug(
"%s has no variation for variant_id %s", assertion_id, variant_id
)
return

# Get strength
predictive_implication = (
assertion["predictive_implication"]
.strip()
.replace(" ", "_")
.replace("-", "_")
.upper()
)
moa_evidence_level = MoaEvidenceLevel[predictive_implication]
strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level]

# Get strength
predictive_implication = (
record["predictive_implication"]
.strip()
.replace(" ", "_")
.replace("-", "_")
.upper()
# Add disease
moa_disease = self._add_disease(assertion["disease"])
if not moa_disease:
logger.debug(
"%s has no disease for disease %s", assertion_id, assertion["disease"]
)
moa_evidence_level = MoaEvidenceLevel[predictive_implication]
strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level]
return

# Add therapeutic agent. We only support one therapy, so we will skip others
therapy_name = record["therapy_name"]
if not therapy_name:
logger.debug("%s has no therapy_name", assertion_id)
continue
# Add document
document = self.able_to_normalize["documents"].get(assertion["source_id"])

therapy_interaction_type = record["therapy_type"]

if "+" in therapy_name:
# Indicates multiple therapies
if therapy_interaction_type.upper() in {
"COMBINATION THERAPY",
"IMMUNOTHERAPY",
"RADIATION THERAPY",
"TARGETED THERAPY",
}:
therapeutic_procedure_type = (
TherapeuticProcedureType.COMBINATION_THERAPY
)
else:
# skipping HORMONE and CHEMOTHERAPY for now
continue
feature_type = assertion["variant"]["feature_type"]
if feature_type == "somatic_variant":
allele_origin_qualifier = AlleleOriginQualifier.SOMATIC
elif feature_type == "germline_variant":
allele_origin_qualifier = AlleleOriginQualifier.GERMLINE
else:
allele_origin_qualifier = None

params = {
"id": assertion_id,
"description": assertion["description"],
"strength": strength,
"subjectVariant": variation_gene_map["cv"],
"alleleOriginQualifier": allele_origin_qualifier,
"geneContextQualifier": variation_gene_map["moa_gene"],
"specifiedBy": self.processed_data.methods[0],
"reportedIn": [document],
}

therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")]
therapeutic_digest = self._get_digest_for_str_lists(
[f"moa.therapy:{tn}" for tn in therapies]
)
therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}"
else:
therapeutic_procedure_id = f"moa.therapy:{therapy_name}"
therapies = [{"label": therapy_name}]
therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT

moa_therapeutic = self._add_therapeutic_procedure(
therapeutic_procedure_id,
therapies,
therapeutic_procedure_type,
therapy_interaction_type,
if assertion["favorable_prognosis"] == "":
params["conditionQualifier"] = moa_disease
params["predicate"] = (
TherapeuticResponsePredicate.RESISTANCE
if assertion["therapy"]["resistance"]
else TherapeuticResponsePredicate.SENSITIVITY
)
params["objectTherapeutic"] = self._get_therapeutic_procedure(assertion)

if not moa_therapeutic:
if not params["objectTherapeutic"]:
logger.debug(
"%s has no therapeutic agent for therapy_name %s",
"%s has no therapeutic procedure for therapy_name %s",
assertion_id,
therapy_name,
)
continue

# Add disease
moa_disease = self._add_disease(record["disease"])
if not moa_disease:
logger.debug(
"%s has no disease for disease %s", assertion_id, record["disease"]
assertion["therapy"]["name"],
)
continue

# Add document
document = self.able_to_normalize["documents"].get(record["source_ids"])

feature_type = record["variant"]["feature_type"]
if feature_type == "somatic_variant":
allele_origin_qualifier = AlleleOriginQualifier.SOMATIC
elif feature_type == "germline_variant":
allele_origin_qualifier = AlleleOriginQualifier.GERMLINE
else:
allele_origin_qualifier = None

statement = VariantTherapeuticResponseStudyStatement(
id=assertion_id,
description=record["description"],
strength=strength,
predicate=predicate,
subjectVariant=variation_gene_map["cv"],
objectTherapeutic=moa_therapeutic,
conditionQualifier=moa_disease,
alleleOriginQualifier=allele_origin_qualifier,
geneContextQualifier=variation_gene_map["moa_gene"],
specifiedBy=self.processed_data.methods[0],
reportedIn=[document],
return
statement = VariantTherapeuticResponseStudyStatement(**params)
else:
params["objectCondition"] = moa_disease
params["predicate"] = (
PrognosticPredicate.BETTER_OUTCOME
if assertion["favorable_prognosis"]
else PrognosticPredicate.WORSE_OUTCOME
)
self.processed_data.statements.append(statement)
statement = VariantPrognosticStudyStatement(**params)

self.processed_data.statements.append(statement)

async def _add_categorical_variants(self, variants: list[dict]) -> None:
"""Create Categorical Variant objects for all MOA variant records.
Expand Down Expand Up @@ -437,6 +405,54 @@ def _add_documents(self, sources: list) -> None:
self.able_to_normalize["documents"][source_id] = document
self.processed_data.documents.append(document)

def _get_therapeutic_procedure(
self, assertion: dict
) -> TherapeuticAgent | TherapeuticSubstituteGroup | CombinationTherapy | None:
"""Get therapeutic procedure object

:param assertion: MOA assertion record
:return: Therapeutic procedure object, if found and able to be normalized
"""
therapy = assertion["therapy"]
therapy_name = therapy["name"]
if not therapy_name:
logger.debug("%s has no therapy_name", assertion["id"])
return None

therapy_interaction_type = therapy["type"]

if "+" in therapy_name:
# Indicates multiple therapies
if therapy_interaction_type.upper() in {
"COMBINATION THERAPY",
"IMMUNOTHERAPY",
"RADIATION THERAPY",
"TARGETED THERAPY",
}:
therapeutic_procedure_type = (
TherapeuticProcedureType.COMBINATION_THERAPY
)
else:
# skipping HORMONE and CHEMOTHERAPY for now
return None

therapies = [{"label": tn.strip()} for tn in therapy_name.split("+")]
therapeutic_digest = self._get_digest_for_str_lists(
[f"moa.therapy:{tn}" for tn in therapies]
)
therapeutic_procedure_id = f"moa.ctid:{therapeutic_digest}"
else:
therapeutic_procedure_id = f"moa.therapy:{therapy_name}"
therapies = [{"label": therapy_name}]
therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_AGENT

return self._add_therapeutic_procedure(
therapeutic_procedure_id,
therapies,
therapeutic_procedure_type,
therapy_interaction_type,
)

def _get_therapeutic_substitute_group(
self,
therapeutic_sub_group_id: str,
Expand Down
Loading
Loading