Skip to content

Commit

Permalink
add mappings for copy change
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Dec 31, 2024
1 parent 23dbe8f commit 2ee3cdf
Show file tree
Hide file tree
Showing 10 changed files with 351 additions and 137 deletions.
7 changes: 5 additions & 2 deletions src/variation/hgvs_dup_del_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import Extension, MappableConcept
from ga4gh.core.models import (
Extension,
)
from ga4gh.vrs import models, normalize

from variation.schemas.normalize_response_schema import HGVSDupDelModeOption
from variation.schemas.token_response_schema import AMBIGUOUS_REGIONS, AltType
from variation.utils import get_copy_change

# Define deletion alt types
DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION}
Expand Down Expand Up @@ -144,7 +147,7 @@ def copy_number_change_mode(
seq_loc.id = ga4gh_identify(seq_loc)
cx = models.CopyNumberChange(
location=seq_loc,
copyChange=MappableConcept(primaryCode=copy_change),
copyChange=get_copy_change(copy_change),
extensions=extensions,
)
cx.id = ga4gh_identify(cx)
Expand Down
26 changes: 24 additions & 2 deletions src/variation/schemas/copy_number_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,18 @@ class ParsedToCxVarService(ServiceResponse):
"start": 10000,
"end": 1223133,
},
"copyChange": {"primaryCode": "EFO:0030069"},
"copyChange": {
"primaryCode": "EFO:0030069",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030069",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"name": "variation-normalizer",
Expand Down Expand Up @@ -393,7 +404,18 @@ class AmplificationToCxVarService(ServiceResponse):
"start": 140713327,
"end": 140924929,
},
"copyChange": {"primaryCode": "EFO:0030072"},
"copyChange": {
"primaryCode": "EFO:0030072",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030072",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"version": __version__,
Expand Down
13 changes: 12 additions & 1 deletion src/variation/schemas/hgvs_to_copy_number_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,18 @@ class HgvsToCopyNumberChangeService(ServiceResponse):
"start": 49531261,
"end": 49531262,
},
"copyChange": {"primaryCode": "EFO:0030069"},
"copyChange": {
"primaryCode": "EFO:0030069",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030069",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"name": "variation-normalizer",
Expand Down
13 changes: 7 additions & 6 deletions src/variation/to_copy_number_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from cool_seq_tool.schemas import Assembly
from cool_seq_tool.sources import UtaDatabase
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import MappableConcept
from ga4gh.vrs import models
from gene.query import QueryHandler as GeneQueryHandler
from gene.schemas import MatchType as GeneMatchType
Expand Down Expand Up @@ -43,7 +42,11 @@
from variation.to_vrs import ToVRS
from variation.tokenize import Tokenize
from variation.translate import Translate
from variation.utils import get_priority_sequence_location, get_vrs_loc_seq
from variation.utils import (
get_copy_change,
get_priority_sequence_location,
get_vrs_loc_seq,
)
from variation.validate import Validate

VALID_CLASSIFICATION_TYPES = [
Expand Down Expand Up @@ -605,7 +608,7 @@ def parsed_to_copy_number(
if is_cx:
variation = models.CopyNumberChange(
location=seq_loc,
copyChange=MappableConcept(primaryCode=request_body.copy_change),
copyChange=get_copy_change(request_body.copy_change),
)
variation.id = ga4gh_identify(variation)
else:
Expand Down Expand Up @@ -717,9 +720,7 @@ def amplification_to_cx_var(
vrs_location.id = ga4gh_identify(vrs_location)
vrs_cx = models.CopyNumberChange(
location=vrs_location,
copyChange=MappableConcept(
primaryCode=models.CopyChange.EFO_0030072.value
),
copyChange=get_copy_change(models.CopyChange.EFO_0030072),
)
vrs_cx.id = ga4gh_identify(vrs_cx)
variation = models.CopyNumberChange(
Expand Down
5 changes: 2 additions & 3 deletions src/variation/translators/amplification.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Module for Amplification Translation."""

from ga4gh.core import ga4gh_identify
from ga4gh.core.models import MappableConcept
from ga4gh.vrs import models

from variation.schemas.app_schemas import Endpoint
Expand All @@ -10,7 +9,7 @@
from variation.schemas.translation_response_schema import TranslationResult
from variation.schemas.validation_response_schema import ValidationResult
from variation.translators.translator import Translator
from variation.utils import get_priority_sequence_location
from variation.utils import get_copy_change, get_priority_sequence_location


class Amplification(Translator):
Expand Down Expand Up @@ -53,7 +52,7 @@ async def translate(
if priority_seq_loc:
vrs_cx = models.CopyNumberChange(
location=models.SequenceLocation(**priority_seq_loc),
copyChange=MappableConcept(primaryCode=models.CopyChange.EFO_0030072),
copyChange=get_copy_change(models.CopyChange.EFO_0030072),
)
vrs_cx.id = ga4gh_identify(vrs_cx)
vrs_cx = vrs_cx.model_dump(exclude_none=True)
Expand Down
20 changes: 19 additions & 1 deletion src/variation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bioutils.sequences import aa3_to_aa1 as _aa3_to_aa1
from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core.models import MappableConcept
from ga4gh.core.models import Coding, ConceptMapping, MappableConcept, Relation
from ga4gh.vrs import models

from variation.schemas.app_schemas import AmbiguousRegexType
Expand Down Expand Up @@ -237,3 +237,21 @@ def get_vrs_loc_seq(
else:
ref = None
return ref or None # get_reference_sequence can return empty str


def get_copy_change(efo_code: models.CopyChange) -> MappableConcept:
"""Get mappable concept for EFO code with exactMatch relation
:param efo_code: EFO code represented as a CURIE
:return: Mappable concept for EFO code with exactMatch relation
"""
return MappableConcept(
primaryCode=efo_code,
mappings=[
ConceptMapping(
relation=Relation.EXACT_MATCH,
coding=Coding(code=efo_code, system="https://www.ebi.ac.uk/efo/"),
)
],
)
72 changes: 55 additions & 17 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from variation.classify import Classify
from variation.query import QueryHandler
from variation.schemas.normalize_response_schema import NormalizeService
from variation.tokenize import Tokenize
from variation.tokenizers import GeneSymbol

Expand Down Expand Up @@ -606,6 +607,21 @@ def _vrs_id_and_digest_existence_checks(vrs_obj_dict, prefix=None):
assert location_vrs_id == f"ga4gh:SL.{location_vrs_digest}"


def _mane_gene_ext_checks(actual_vo: dict) -> None:
"""Check mane gene extensions existence
:param actual_vo: Actual VRS object represented as a dictionary
"""
extensions = actual_vo.pop("extensions")
assert len(extensions) == 1

mane_genes_ext = extensions[0]
assert mane_genes_ext["name"] == "mane_genes"
for mane_gene in mane_genes_ext["value"]:
assert mane_gene["ncbi_gene_id"]
assert mane_gene["symbol"]


def assertion_checks(
normalize_response, test_variation, mane_genes_exts=False, check_vrs_id=False
):
Expand All @@ -616,14 +632,7 @@ def assertion_checks(

# Check MANE genes existence
if mane_genes_exts:
extensions = actual.pop("extensions")
assert len(extensions) == 1

mane_genes_ext = extensions[0]
assert mane_genes_ext["name"] == "mane_genes"
for mane_gene in mane_genes_ext["value"]:
assert mane_gene["ncbi_gene_id"]
assert mane_gene["symbol"]
_mane_gene_ext_checks(actual)

expected = test_variation.model_copy().model_dump(exclude_none=True)
if not check_vrs_id:
Expand All @@ -633,21 +642,50 @@ def assertion_checks(
assert actual == expected, "variation"


def cnv_assertion_checks(resp, test_fixture, check_vrs_id=False):
def cnv_assertion_checks(resp, test_fixture, check_vrs_id=False, mane_genes_exts=False):
"""Check that actual response for to copy number matches expected"""
try:
cnc = resp.copy_number_count
except AttributeError:
actual = resp.copy_number_change.model_dump(exclude_none=True)
prefix = "ga4gh:CX."

def _update_expected_mappings(expected_):
"""Modify test fixture copy to include mappable concept object for CX var"""
expected_["copyChange"]["mappings"] = [
{
"relation": "exactMatch",
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": expected_["copyChange"]["primaryCode"],
},
}
]

expected = test_fixture.model_copy(deep=True).model_dump(exclude_none=True)

if isinstance(resp, NormalizeService):
actual = resp.variation
if isinstance(actual, models.CopyNumberChange):
_update_expected_mappings(expected)
prefix = "ga4gh:CX."
elif isinstance(actual, models.CopyNumberCount):
prefix = "ga4gh:CN."

actual = actual.model_dump(exclude_none=True)
else:
actual = cnc.model_dump(exclude_none=True)
prefix = "ga4gh:CN."
try:
cnc = resp.copy_number_count
except AttributeError:
_update_expected_mappings(expected)
actual = resp.copy_number_change.model_dump(exclude_none=True)
prefix = "ga4gh:CX."
else:
actual = cnc.model_dump(exclude_none=True)
prefix = "ga4gh:CN."

# Check MANE genes existence
if mane_genes_exts:
_mane_gene_ext_checks(actual)

if not check_vrs_id:
_vrs_id_and_digest_existence_checks(actual, prefix=prefix)

expected = test_fixture.model_copy().model_dump(exclude_none=True)
if not check_vrs_id:
_delete_id_and_digest(expected)
_delete_id_and_digest(expected["location"])
Expand Down
Loading

0 comments on commit 2ee3cdf

Please sign in to comment.