Skip to content

Commit

Permalink
wip: fix coding
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jan 16, 2025
1 parent 82b75d3 commit 3fcf5a6
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 90 deletions.
30 changes: 26 additions & 4 deletions docs/source/normalizing_data/normalization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,81 +83,103 @@ Normalized records are structured as `Genes <https://github.com/ga4gh/vrs/tree/2
},
{
"coding": {
"id": "ncbigene:673",
"code": "673",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "ensembl:ENSG00000157764",
"code": "ENSG00000157764",
"system": "https://www.ensembl.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "iuphar:1943",
"code": "1943",
"system": "https://www.guidetopharmacology.org",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "119066", "system": "orphanet"},
"coding": {
"id": "orphanet:119066",
"code": "119066",
"system": "https://www.orpha.net",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "cosmic:BRAF",
"code": "BRAF",
"system": "https://cancer.sanger.ac.uk/cosmic",
"system": "https://cancer.sanger.ac.uk/cosmic/",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "pubmed:2284096",
"code": "2284096",
"system": "https://pubmed.ncbi.nlm.nih.gov",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "ucsc:uc003vwc.5",
"code": "uc003vwc.5",
"system": "https://genome.ucsc.edu",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "164757", "system": "https://www.omim.org"},
"coding": {
"id": "omim:164757",
"code": "164757",
"system": "https://www.omim.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "refseq:NM_004333",
"code": "NM_004333",
"system": "https://www.ncbi.nlm.nih.gov/refseq/",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "uniprot:P15056",
"code": "P15056",
"system": "https://www.uniprot.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "ena.embl:M95712",
"code": "M95712",
"system": "https://www.ebi.ac.uk/ena/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "OTTHUMG00000157457", "system": "vega"},
"coding": {
"id": "vega:OTTHUMG00000157457",
"code": "OTTHUMG00000157457",
"system": "https://www.sanger.ac.uk/tool/vega-genome-browser/",
},
"relation": "relatedMatch",
},
{
"coding": {
"id": "pubmed:1565476",
"code": "1565476",
"system": "https://pubmed.ncbi.nlm.nih.gov",
},
Expand Down
18 changes: 11 additions & 7 deletions src/gene/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from gene.database import AbstractDatabase, DatabaseReadException
from gene.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
BaseGene,
BaseNormalizationService,
Gene,
Expand Down Expand Up @@ -348,7 +347,7 @@ def _add_merged_meta(self, response: NormalizeService) -> NormalizeService:

sources = []
for m in gene.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
ns = m.coding.id.split(":")[0]
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

Expand Down Expand Up @@ -406,27 +405,32 @@ def _create_concept_mapping(
) -> ConceptMapping:
"""Create concept mapping for identifier
``system`` will use source homepage or namespace prefix, in that order of \
preference, if available.
``system`` will use source homepage
:param concept_id: A lowercase concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:raises ValueError: If source of concept ID is not a valid
``NamespacePrefix``
:return: Concept mapping for identifier
"""
source = concept_id.split(":")[0]
source, source_code = concept_id.split(":")

try:
source = NamespacePrefix(source)
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = NAMESPACE_TO_SYSTEM_URI.get(source, source)
if source == NamespacePrefix.HGNC:
source_code = concept_id.upper()

return ConceptMapping(
coding=Coding(code=code(concept_id), system=system), relation=relation
coding=Coding(
id=concept_id,
code=code(source_code),
system=NAMESPACE_TO_SYSTEM_URI[source],
),
relation=relation,
)

gene_obj = MappableConcept(
Expand Down
64 changes: 42 additions & 22 deletions src/gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ class NamespacePrefix(Enum):
HORDE = "hordedb"
MEROPS = "merops"
IUPHAR = "iuphar"
KZNF = "knzfgc"
MAMIT = "mamittrnadb"
CD = "hcdmdb"
LNCRNADB = "lncrnadb"
Expand All @@ -193,18 +192,22 @@ class NamespacePrefix(Enum):
NamespacePrefix.PUBMED: "https://pubmed.ncbi.nlm.nih.gov",
NamespacePrefix.COSMIC: "https://cancer.sanger.ac.uk/cosmic/",
NamespacePrefix.OMIM: "https://www.omim.org",
NamespacePrefix.MIRBASE: "https://www.mirbase.org",
NamespacePrefix.HOMEODB: "http://homeodb.zoo.ox.ac.uk",
NamespacePrefix.SNORNABASE: "https://www-snorna.biotoul.fr",
NamespacePrefix.ORPHANET: "https://www.orpha.net",
NamespacePrefix.PSEUDOGENE: "http://pseudogene.org",
NamespacePrefix.HORDE: "https://genome.weizmann.ac.il/horde/",
NamespacePrefix.MEROPS: "https://www.ebi.ac.uk/merops/",
NamespacePrefix.IUPHAR: "https://www.guidetopharmacology.org",
NamespacePrefix.MAMIT: "http://mamit-trna.u-strasbg.fr",
NamespacePrefix.CD: "http://www.hcdm.org",
NamespacePrefix.IMGT: "https://www.imgt.org",
NamespacePrefix.IMGT_GENE_DB: "https://www.imgt.org",
NamespacePrefix.LNCRNADB: "https://rnacentral.org",
NamespacePrefix.RFAM: "https://rfam.org",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class DataLicenseAttributes(BaseModel):
"""Define constraints for data license attributes."""
Expand Down Expand Up @@ -347,95 +350,112 @@ class NormalizeService(BaseNormalizationService):
"mappings": [
{
"coding": {
"code": "hgnc:1097",
"id": "hgnc:1097",
"code": "HGNC:1097",
"system": "https://www.genenames.org",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "ncbigene:673",
"id": "ncbigene:673",
"code": "673",
"system": "https://www.ncbi.nlm.nih.gov/gene/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "ensembl:ENSG00000157764",
"id": "ensembl:ENSG00000157764",
"code": "ENSG00000157764",
"system": "https://www.ensembl.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "iuphar:1943",
"id": "iuphar:1943",
"code": "1943",
"system": "https://www.guidetopharmacology.org",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "orphanet:119066", "system": "orphanet"},
"coding": {
"id": "orphanet:119066",
"code": "119066",
"system": "https://www.orpha.net",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "cosmic:BRAF",
"system": "https://cancer.sanger.ac.uk/cosmic",
"id": "cosmic:BRAF",
"code": "BRAF",
"system": "https://cancer.sanger.ac.uk/cosmic/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "pubmed:2284096",
"id": "pubmed:2284096",
"code": "2284096",
"system": "https://pubmed.ncbi.nlm.nih.gov",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "ucsc:uc003vwc.5",
"id": "ucsc:uc003vwc.5",
"code": "uc003vwc.5",
"system": "https://genome.ucsc.edu",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "omim:164757",
"id": "omim:164757",
"code": "164757",
"system": "https://www.omim.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "refseq:NM_004333",
"id": "refseq:NM_004333",
"code": "NM_004333",
"system": "https://www.ncbi.nlm.nih.gov/refseq/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "uniprot:P15056",
"id": "uniprot:P15056",
"code": "P15056",
"system": "https://www.uniprot.org",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "ena.embl:M95712",
"id": "ena.embl:M95712",
"code": "M95712",
"system": "https://www.ebi.ac.uk/ena/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "vega:OTTHUMG00000157457",
"system": "vega",
"id": "vega:OTTHUMG00000157457",
"code": "OTTHUMG00000157457",
"system": "https://www.sanger.ac.uk/tool/vega-genome-browser/",
},
"relation": "relatedMatch",
},
{
"coding": {
"code": "pubmed:1565476",
"id": "pubmed:1565476",
"code": "1565476",
"system": "https://pubmed.ncbi.nlm.nih.gov",
},
"relation": "relatedMatch",
Expand Down
Loading

0 comments on commit 3fcf5a6

Please sign in to comment.