Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ontoportal mapping model json converter #281

Merged
Merged
2 changes: 1 addition & 1 deletion sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"obographs-json",
"json",
]
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir"]
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir", "ontoportal_json"]

SSSOM_DEFAULT_RDF_SERIALISATION = "turtle"

Expand Down
65 changes: 65 additions & 0 deletions sssom/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from .constants import SCHEMA_YAML
from .parsers import to_mapping_set_document
from .typehints import PrefixMap
from .util import (
PREFIX_MAP_KEY,
RDF_FORMATS,
Expand Down Expand Up @@ -142,6 +143,19 @@ def write_owl(
print(t.decode(), file=file)


def write_ontoportal_json(
msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json"
) -> None:
"""Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
if serialisation == "ontoportal_json":
data = to_ontoportal_json(msdf)
json.dump(data, output, indent=2)
else:
raise ValueError(
f"Unknown json format: {serialisation}, currently only ontoportal_json supported"
)


# Converters
# Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe)

Expand Down Expand Up @@ -475,6 +489,40 @@ def to_json(msdf: MappingSetDataFrame) -> JsonObj:
return json_obj


def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]:
"""Convert a mapping set dataframe to a list of ontoportal mapping JSON nbjects."""
prefix_map = msdf.prefix_map
metadata: Dict[str, Any] = msdf.metadata if msdf.metadata is not None else {}
m_list = []

def resolve(x):
return _resolve_url(x, prefix_map)

if msdf.df is not None:
for _, row in msdf.df.iterrows():
json_obj = {
"classes": [resolve(row["subject_id"]), resolve(row["object_id"])],
"subject_source_id": _resolve_prefix(
row.get("subject_source", ""), prefix_map
),
"object_source_id": _resolve_prefix(
row.get("object_source", ""), prefix_map
),
"source_name": metadata.get("mapping_set_id", ""),
"source_contact_info": ",".join(metadata.get("creator_id", "")),
"date": metadata.get("mapping_date", row.get("mapping_date", "")),
"process": {
"name": metadata.get("mapping_set_description", ""),
"source": resolve(row.get("mapping_justification", "")),
"comment": row.get("comment", ""),
"relation": [resolve(row["predicate_id"])],
},
}
m_list.append(json_obj)

return m_list


# Support methods


Expand All @@ -501,6 +549,8 @@ def get_writer_function(
return write_json, output_format
elif output_format == "fhir_json":
return write_fhir_json, output_format
elif output_format == "ontoportal_json":
return write_ontoportal_json, output_format
elif output_format == "owl":
return write_owl, SSSOM_DEFAULT_RDF_SERIALISATION
else:
Expand Down Expand Up @@ -554,3 +604,18 @@ def _get_separator(serialisation: Optional[str] = None) -> str:
f"Unknown table format: {serialisation}, should be one of tsv or csv"
)
return sep


def _resolve_url(prefixed_url_str: str, prefix_map: PrefixMap) -> str:
if not prefixed_url_str:
return prefixed_url_str

prefix_url = prefixed_url_str.split(":")
if len(prefix_url) != 2:
return prefixed_url_str
else:
return _resolve_prefix(prefix_url[0], prefix_map) + prefix_url[1]
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved


def _resolve_prefix(prefix_str, prefix_map: PrefixMap) -> str:
return prefix_map.get(prefix_str, prefix_str + ":")
19 changes: 18 additions & 1 deletion tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import unittest
from typing import Dict

from rdflib import Graph

Expand All @@ -18,7 +19,7 @@
write_json,
write_owl,
write_rdf,
write_table,
write_table, to_ontoportal_json,
)

from .test_data import SSSOMTestCase, get_all_test_cases
Expand Down Expand Up @@ -51,6 +52,8 @@ def test_conversion(self):
logging.info("Testing JSON export")
self._test_to_json_dict(mdoc, test)
self._test_to_json(mdoc, test)
logging.info("Testing ontoportal JSON export")
self._test_to_ontoportal_json(mdoc,test)

def _test_to_owl_graph(self, mdoc, test):
msdf = to_mapping_set_dataframe(mdoc)
Expand All @@ -74,6 +77,20 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase):
with open(test.get_out_file("json"), "w") as file:
write_json(msdf, file, serialisation="json")

def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase):
msdf = to_mapping_set_dataframe(mdoc)
jsonob = to_ontoportal_json(msdf)
self.assertEqual(len(jsonob), test.ct_data_frame_rows)
first_ob: Dict = jsonob[0]
self.assertTrue("classes" in first_ob)
self.assertTrue(len(first_ob.get("classes")) == 2)
self.assertTrue("process" in first_ob)
first_process_ob: Dict = first_ob.get("process")

self.assertTrue("relation" in first_process_ob)
self.assertIsInstance(first_process_ob.get("relation"), list)
self.assertGreater(len(first_process_ob.get("relation")), 0)

def _test_to_rdf_graph(self, mdoc, test):
msdf = to_mapping_set_dataframe(mdoc)
g = to_rdf_graph(msdf)
Expand Down
17 changes: 16 additions & 1 deletion tests/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from jsonasobj2 import JsonObj

from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table
from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table
from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table, write_ontoportal_json
from tests.constants import data_dir as test_data_dir
from tests.constants import test_out_dir

Expand Down Expand Up @@ -84,3 +84,18 @@ def test_write_sssom_owl(self):
# FIXME this test doesn't test anything
# TODO implement "read_owl" function
self.assertEqual(1, 1)

def test_write_sssom_ontoportal_json(self):
"""Test writing as ontoportal JSON."""
path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json")
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
with open(path, "w") as file:
write_ontoportal_json(self.msdf, file)

with open(path, "r") as file:
d: list = json.load(file)

self.assertEqual(
len(d),
self.mapping_count,
f"{path} has the wrong number of mappings.",
)