Skip to content

Commit

Permalink
feat!: use preferred formats for MappableConcept.mappings
Browse files Browse the repository at this point in the history
close #383

* use preferred format (URI) for `system`, where possible
  • Loading branch information
korikuzma committed Dec 26, 2024
1 parent 7830841 commit 84c2711
Show file tree
Hide file tree
Showing 4 changed files with 353 additions and 206 deletions.
187 changes: 75 additions & 112 deletions docs/source/normalizing_data/normalization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,132 +74,95 @@ Normalized records are structured as `Genes <https://github.com/ga4gh/vrs/tree/2
"primaryCode": "hgnc:1097",
"label": "BRAF",
"mappings": [
{
"coding": {
"code": "1097",
"system": "hgnc"
},
"relation": "exactMatch"
},
{
"coding": {
"code": "673",
"system": "ncbigene"
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "ENSG00000157764",
"system": "ensembl"
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "CCDS5863",
"system": "ccds"
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "1943",
"system": "iuphar"
{
"coding": {
"code": "HGNC:1097",
"system": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/",
},
"relation": "exactMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "119066",
"system": "orphanet"
{
"coding": {
"code": "673",
"system": "https://www.ncbi.nlm.nih.gov/gene/?term=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "BRAF",
"system": "cosmic"
{
"coding": {
"code": "ENSG00000157764",
"system": "https://useast.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "2284096",
"system": "pubmed"
{
"coding": {
"code": "1943",
"system": "https://www.guidetopharmacology.org/GRAC/ObjectDisplayForward?objectId=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "uc003vwc.5",
"system": "ucsc"
{
"coding": {"code": "119066", "system": "orphanet"},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "164757",
"system": "omim"
{
"coding": {
"code": "BRAF",
"system": "https://cancer.sanger.ac.uk/cosmic/gene/analysis?ln=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "NM_004333",
"system": "refseq"
{
"coding": {
"code": "2284096",
"system": "https://pubmed.ncbi.nlm.nih.gov/",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "CCDS87555",
"system": "ccds"
{
"coding": {
"code": "uc003vwc.5",
"system": "https://genome.cse.ucsc.edu/cgi-bin/hgGene?hgg_gene=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "P15056",
"system": "uniprot"
{
"coding": {"code": "164757", "system": "https://www.omim.org/entry/"},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "M95712",
"system": "ena.embl"
{
"coding": {
"code": "NM_004333",
"system": "https://www.ncbi.nlm.nih.gov/refseq/?term=",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "OTTHUMG00000157457",
"system": "vega"
{
"coding": {
"code": "P15056",
"system": "https://www.uniprot.org/uniprotkb/",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "1565476",
"system": "pubmed"
{
"coding": {
"code": "M95712",
"system": "https://www.ebi.ac.uk/ena/browser/view/",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "CCDS94219",
"system": "ccds"
{
"coding": {"code": "OTTHUMG00000157457", "system": "vega"},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
},
{
"coding": {
"code": "CCDS94218",
"system": "ccds"
{
"coding": {
"code": "1565476",
"system": "https://pubmed.ncbi.nlm.nih.gov/",
},
"relation": "relatedMatch",
},
"relation": "relatedMatch"
}
],
"extensions": [
{
Expand Down
41 changes: 25 additions & 16 deletions src/gene/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
from gene import ITEM_TYPES, NAMESPACE_LOOKUP, PREFIX_LOOKUP, __version__
from gene.database import AbstractDatabase, DatabaseReadException
from gene.schemas import (
NAMESPACE_TO_SYSTEM_URI,
BaseGene,
BaseNormalizationService,
Gene,
GeneTypeFieldName,
MatchesNormalized,
MatchType,
NamespacePrefix,
NormalizeService,
RecordType,
RefType,
Expand Down Expand Up @@ -342,20 +344,17 @@ def _add_merged_meta(self, response: NormalizeService) -> NormalizeService:
"""
sources_meta = {}
gene = response.gene
sources = [gene.primaryCode.root.split(":")[0]]
if gene.mappings:
sources += [m.coding.system for m in gene.mappings]

sources = []
for m in gene.mappings or []:
for ns, system in NAMESPACE_TO_SYSTEM_URI.items():
if system == m.coding.system and ns.value in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns.value])

for src in sources:
try:
src_name = PREFIX_LOOKUP[src]
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
_source_meta = self.db.get_source_metadata(src_name)
sources_meta[SourceName(src_name)] = SourceMeta(**_source_meta)
if src not in sources_meta:
_source_meta = self.db.get_source_metadata(src)
sources_meta[SourceName(src)] = SourceMeta(**_source_meta)
response.source_meta_ = sources_meta
return response

Expand Down Expand Up @@ -400,17 +399,27 @@ def _add_gene(
"""

def _create_concept_mapping(
curie: str, relation: Relation = Relation.RELATED_MATCH
concept_id: str, relation: Relation = Relation.RELATED_MATCH
) -> ConceptMapping:
"""Create concept mapping for identifier
:param curie: Identifier represented as a curie
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:return: Concept mapping for identifier
"""
system, system_code = curie.split(":")
source, source_id = concept_id.split(":")

try:
source = NamespacePrefix(source.lower())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source.lower()}"
raise ValueError(err_msg) from e

system = NAMESPACE_TO_SYSTEM_URI.get(source, source)
code_ = concept_id.upper() if source == NamespacePrefix.HGNC else source_id

return ConceptMapping(
coding=Coding(code=code(system_code), system=system), relation=relation
coding=Coding(code=code(code_), system=system), relation=relation
)

gene_obj = MappableConcept(
Expand Down
Loading

0 comments on commit 84c2711

Please sign in to comment.