Skip to content

Commit

Permalink
feat!: update models to vrs 2.0.0 community review ballot (#582)
Browse files Browse the repository at this point in the history
close #581 

* Update modules to vrs
[2.0.0-ballot.2024-11.3](https://github.com/ga4gh/vrs/tree/2.0.0-ballot.2024-11.3)
tag
  * `CopyNumberChange` has `mappings` for EFO code
  • Loading branch information
korikuzma authored Jan 2, 2025
1 parent 98cadad commit cc47bb3
Show file tree
Hide file tree
Showing 19 changed files with 478 additions and 253 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies = [
"fastapi",
"uvicorn",
"pydantic ==2.*",
"ga4gh.vrs[extras] ~= 2.0.0a10",
"gene-normalizer ~=0.4.0",
"ga4gh.vrs[extras] ==2.0.0a13",
"gene-normalizer ~=0.6.0",
"boto3",
"cool-seq-tool ~=0.6.0",
"bioutils"
Expand Down
7 changes: 4 additions & 3 deletions src/variation/gnomad_vcf_to_protein_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.schemas import Strand
from ga4gh.core import domain_models, ga4gh_identify
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import MappableConcept
from ga4gh.vrs import models, normalize
from gene.query import QueryHandler as GeneQueryHandler
from gene.schemas import MatchType as GeneMatchType
Expand Down Expand Up @@ -413,14 +414,14 @@ def _get_protein_representation(
self.seqrepo_access, p_ac, variation.location.start, variation.location.end
)
if loc_seq:
variation.location.sequence = models.SequenceString(root=loc_seq)
variation.location.sequence = models.sequenceString(root=loc_seq)

# Add VRS digests for VRS Allele and VRS Sequence Location
variation.id = ga4gh_identify(variation)
variation.location.id = ga4gh_identify(variation.location)
return variation

def _get_gene_context(self, gene: str) -> domain_models.Gene | None:
def _get_gene_context(self, gene: str) -> MappableConcept | None:
"""Get additional gene information from gene-normalizer
:param gene: Gene symbol
Expand Down
20 changes: 13 additions & 7 deletions src/variation/hgvs_dup_del_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import entity_models, ga4gh_identify
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import (
Extension,
)
from ga4gh.vrs import models, normalize

from variation.schemas.normalize_response_schema import HGVSDupDelModeOption
from variation.schemas.token_response_schema import AMBIGUOUS_REGIONS, AltType
from variation.utils import get_copy_change_concept

# Define deletion alt types
DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION}
Expand Down Expand Up @@ -49,7 +53,7 @@ def default_mode(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict | None:
"""Use default characteristics to return a variation.
If baseline_copies not provided and endpoints are ambiguous - copy_number_change
Expand Down Expand Up @@ -92,7 +96,7 @@ def copy_number_count_mode(
alt_type: AltType,
location: dict,
baseline_copies: int,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict:
"""Return a VRS Copy Number Variation.
Expand All @@ -119,7 +123,7 @@ def copy_number_change_mode(
alt_type: AltType,
location: dict,
copy_change: models.CopyChange | None = None,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict:
"""Return copy number change variation
Expand All @@ -142,7 +146,9 @@ def copy_number_change_mode(
seq_loc = models.SequenceLocation(**location)
seq_loc.id = ga4gh_identify(seq_loc)
cx = models.CopyNumberChange(
location=seq_loc, copyChange=copy_change, extensions=extensions
location=seq_loc,
copyChange=get_copy_change_concept(copy_change),
extensions=extensions,
)
cx.id = ga4gh_identify(cx)
return cx.model_dump(exclude_none=True)
Expand All @@ -153,7 +159,7 @@ def allele_mode(
alt_type: AltType,
vrs_seq_loc_ac: str,
alt: str,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict | None:
"""Return a VRS Allele with a normalized LiteralSequenceExpression or
ReferenceLengthExpression.
Expand Down Expand Up @@ -208,7 +214,7 @@ def interpret_variation(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict:
"""Interpret variation using HGVSDupDelMode
Expand Down
34 changes: 28 additions & 6 deletions src/variation/schemas/copy_number_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@ class ParsedToCxVarService(ServiceResponse):
"example": {
"copy_number_change": {
"type": "CopyNumberChange",
"id": "ga4gh:CX.5kaJC-7Jj851bfJ6EipsHV413feg1T4T",
"digest": "5kaJC-7Jj851bfJ6EipsHV413feg1T4T",
"id": "ga4gh:CX.XIsVHbhEUbXraIgpgV4ToCa-6oZWMRUD",
"digest": "XIsVHbhEUbXraIgpgV4ToCa-6oZWMRUD",
"location": {
"type": "SequenceLocation",
"id": "ga4gh:SL.Iz_azSFTEulx7tCluLgGhE1n0hTLUocb",
Expand All @@ -337,7 +337,18 @@ class ParsedToCxVarService(ServiceResponse):
"start": 10000,
"end": 1223133,
},
"copyChange": "efo:0030069",
"copyChange": {
"primaryCode": "EFO:0030069",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030069",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"name": "variation-normalizer",
Expand Down Expand Up @@ -379,8 +390,8 @@ class AmplificationToCxVarService(ServiceResponse):
},
"amplification_label": "BRAF Amplification",
"copy_number_change": {
"id": "ga4gh:CX._UsXDMCLtPwsVKiNByhbwfS569K1wLWW",
"digest": "_UsXDMCLtPwsVKiNByhbwfS569K1wLWW",
"id": "ga4gh:CX.uPQaLz6KSwXWdsjNUZ5kRn3znBZF5YwV",
"digest": "uPQaLz6KSwXWdsjNUZ5kRn3znBZF5YwV",
"type": "CopyNumberChange",
"location": {
"id": "ga4gh:SL.0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B",
Expand All @@ -393,7 +404,18 @@ class AmplificationToCxVarService(ServiceResponse):
"start": 140713327,
"end": 140924929,
},
"copyChange": "efo:0030072",
"copyChange": {
"primaryCode": "EFO:0030072",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030072",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"version": __version__,
Expand Down
4 changes: 2 additions & 2 deletions src/variation/schemas/gnomad_vcf_to_protein_schema.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Module for gnomad vcf to protein response schema"""

from ga4gh.core import domain_models
from ga4gh.core.models import MappableConcept

from variation.schemas.normalize_response_schema import NormalizeService


class GnomadVcfToProteinService(NormalizeService):
"""Define response for gnomad vcf to protein service"""

gene_context: domain_models.Gene | None = None
gene_context: MappableConcept | None = None
17 changes: 14 additions & 3 deletions src/variation/schemas/hgvs_to_copy_number_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class HgvsToCopyNumberChangeService(ServiceResponse):
"example": {
"hgvs_expr": "NC_000003.12:g.49531262dup",
"copy_number_change": {
"id": "ga4gh:CX.Zzws_y4cnoooQ7WXjg2B3nKIyFWXzOg3",
"digest": "Zzws_y4cnoooQ7WXjg2B3nKIyFWXzOg3",
"id": "ga4gh:CX.30bDl5yhHzjc4M5uGS_8IeYMzHQksQGh",
"digest": "30bDl5yhHzjc4M5uGS_8IeYMzHQksQGh",
"type": "CopyNumberChange",
"location": {
"id": "ga4gh:SL.2vbgFGHGB0QGODwgZNi05fWbROkkjf04",
Expand All @@ -70,7 +70,18 @@ class HgvsToCopyNumberChangeService(ServiceResponse):
"start": 49531261,
"end": 49531262,
},
"copyChange": "efo:0030069",
"copyChange": {
"primaryCode": "EFO:0030069",
"mappings": [
{
"coding": {
"system": "https://www.ebi.ac.uk/efo/",
"code": "EFO:0030069",
},
"relation": "exactMatch",
}
],
},
},
"service_meta_": {
"name": "variation-normalizer",
Expand Down
4 changes: 2 additions & 2 deletions src/variation/schemas/token_response_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Literal

from cool_seq_tool.schemas import AnnotationLayer
from ga4gh.core import domain_models
from ga4gh.core.models import MappableConcept
from pydantic import BaseModel, StrictInt, StrictStr

from variation.schemas.app_schemas import AmbiguousRegexType
Expand Down Expand Up @@ -255,4 +255,4 @@ class GeneToken(Token):

matched_value: StrictStr
token_type: Literal[TokenType.GENE] = TokenType.GENE
gene: domain_models.Gene | None = None
gene: MappableConcept | None = None
13 changes: 10 additions & 3 deletions src/variation/to_copy_number_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@
from variation.to_vrs import ToVRS
from variation.tokenize import Tokenize
from variation.translate import Translate
from variation.utils import get_priority_sequence_location, get_vrs_loc_seq
from variation.utils import (
get_copy_change_concept,
get_priority_sequence_location,
get_vrs_loc_seq,
)
from variation.validate import Validate

VALID_CLASSIFICATION_TYPES = [
Expand Down Expand Up @@ -603,7 +607,8 @@ def parsed_to_copy_number(
else:
if is_cx:
variation = models.CopyNumberChange(
location=seq_loc, copyChange=request_body.copy_change
location=seq_loc,
copyChange=get_copy_change_concept(request_body.copy_change),
)
variation.id = ga4gh_identify(variation)
else:
Expand Down Expand Up @@ -715,7 +720,9 @@ def amplification_to_cx_var(
vrs_location.id = ga4gh_identify(vrs_location)
vrs_cx = models.CopyNumberChange(
location=vrs_location,
copyChange=models.CopyChange.EFO_0030072.value,
copyChange=get_copy_change_concept(
models.CopyChange.EFO_0030072
),
)
vrs_cx.id = ga4gh_identify(vrs_cx)
variation = models.CopyNumberChange(
Expand Down
4 changes: 2 additions & 2 deletions src/variation/translators/amplification.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from variation.schemas.translation_response_schema import TranslationResult
from variation.schemas.validation_response_schema import ValidationResult
from variation.translators.translator import Translator
from variation.utils import get_priority_sequence_location
from variation.utils import get_copy_change_concept, get_priority_sequence_location


class Amplification(Translator):
Expand Down Expand Up @@ -52,7 +52,7 @@ async def translate(
if priority_seq_loc:
vrs_cx = models.CopyNumberChange(
location=models.SequenceLocation(**priority_seq_loc),
copyChange=models.CopyChange.EFO_0030072,
copyChange=get_copy_change_concept(models.CopyChange.EFO_0030072),
)
vrs_cx.id = ga4gh_identify(vrs_cx)
vrs_cx = vrs_cx.model_dump(exclude_none=True)
Expand Down
6 changes: 3 additions & 3 deletions src/variation/translators/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.schemas import AnnotationLayer, ManeGeneData, ResidueMode
from cool_seq_tool.sources import UtaDatabase
from ga4gh.core import entity_models
from ga4gh.core.models import Extension
from ga4gh.vrs import models

from variation.hgvs_dup_del_mode import HGVSDupDelMode
Expand Down Expand Up @@ -258,7 +258,7 @@ async def get_p_or_cdna_translation_result(
@staticmethod
def _mane_gene_extensions(
mane_genes: list[ManeGeneData],
) -> list[entity_models.Extension] | None:
) -> list[Extension] | None:
"""Transform mane genes to list of extensions
This is only used in Genomic translators
Expand All @@ -270,7 +270,7 @@ def _mane_gene_extensions(
mane_genes_exts = None
if mane_genes:
mane_genes_exts = [
entity_models.Extension(
Extension(
name="mane_genes",
value=mane_genes,
)
Expand Down
23 changes: 20 additions & 3 deletions src/variation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bioutils.sequences import aa3_to_aa1 as _aa3_to_aa1
from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import domain_models
from ga4gh.core.models import Coding, ConceptMapping, MappableConcept, Relation
from ga4gh.vrs import models

from variation.schemas.app_schemas import AmbiguousRegexType
Expand Down Expand Up @@ -67,12 +67,12 @@ def _get_priority_sequence_location(


def get_priority_sequence_location(
gene: domain_models.Gene, seqrepo_access: SeqRepoAccess
gene: MappableConcept, seqrepo_access: SeqRepoAccess
) -> dict | None:
"""Get prioritized sequence location from a gene
Will prioritize NCBI and then Ensembl. GRCh38 will be chosen over GRCh37.
:param gene: GA4GH Core Gene
:param gene: Mappable Concept containing gene information
:param seqrepo_access: Client to access seqrepo
:return: Prioritized sequence location represented as a dictionary if found
"""
Expand Down Expand Up @@ -237,3 +237,20 @@ def get_vrs_loc_seq(
else:
ref = None
return ref or None # get_reference_sequence can return empty str


def get_copy_change_concept(efo_code: models.CopyChange) -> MappableConcept:
"""Get mappable concept for EFO code with exactMatch relation
:param efo_code: EFO code represented as a CURIE
:return: Mappable concept for EFO code with exactMatch relation
"""
return MappableConcept(
primaryCode=efo_code,
mappings=[
ConceptMapping(
relation=Relation.EXACT_MATCH,
coding=Coding(code=efo_code, system="https://www.ebi.ac.uk/efo/"),
)
],
)
7 changes: 4 additions & 3 deletions src/variation/vrs_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
from ga4gh.core import entity_models, ga4gh_identify
from ga4gh.core import ga4gh_identify
from ga4gh.core.models import Extension
from ga4gh.vrs import models, normalize
from pydantic import ValidationError

Expand Down Expand Up @@ -97,7 +98,7 @@ def vrs_allele(
sstate: models.LiteralSequenceExpression | models.ReferenceLengthExpression,
alt_type: AltType,
errors: list[str],
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict | None:
"""Create a VRS Allele object.
Expand Down Expand Up @@ -154,7 +155,7 @@ def to_vrs_allele(
cds_start: int | None = None,
alt: str | None = None,
residue_mode: ResidueMode = ResidueMode.RESIDUE,
extensions: list[entity_models.Extension] | None = None,
extensions: list[Extension] | None = None,
) -> dict | None:
"""Translate accession and position to VRS Allele Object.
Expand Down
Loading

0 comments on commit cc47bb3

Please sign in to comment.