Skip to content

Commit

Permalink
Add the ontoportal mapping model json converter (#281)
Browse files Browse the repository at this point in the history
* add conversion to ontoportal json tests

* add the to_ontoportal_json converter

* add the write_sssom_ontoportal_json test

* add the write_ontoportal_json function

* add the ontoportal writer to the list of writers of the CLI

* change the  serialisation parameter to use "ontoportal_json"

* fix linters issues

* add "ontoportal_json" to SSSOM_EXPORT_FORMATS array

* remove the process section form the ontoportal format

* auto lint code

* update the test for  the new ontoportal mappings format
  • Loading branch information
syphax-bouazzouni authored Mar 21, 2023
1 parent 59202d8 commit cc22dab
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 2 deletions.
2 changes: 1 addition & 1 deletion sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
"obographs-json",
"json",
]
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir"]
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir", "ontoportal_json"]

SSSOM_DEFAULT_RDF_SERIALISATION = "turtle"

Expand Down
63 changes: 63 additions & 0 deletions sssom/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from .constants import SCHEMA_YAML
from .parsers import to_mapping_set_document
from .typehints import PrefixMap
from .util import (
PREFIX_MAP_KEY,
RDF_FORMATS,
Expand Down Expand Up @@ -147,6 +148,19 @@ def write_owl(
print(t.decode(), file=file)


def write_ontoportal_json(
msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json"
) -> None:
"""Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
if serialisation == "ontoportal_json":
data = to_ontoportal_json(msdf)
json.dump(data, output, indent=2)
else:
raise ValueError(
f"Unknown json format: {serialisation}, currently only ontoportal_json supported"
)


# Converters
# Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe)

Expand Down Expand Up @@ -480,6 +494,38 @@ def to_json(msdf: MappingSetDataFrame) -> JsonObj:
return json_obj


def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]:
"""Convert a mapping set dataframe to a list of ontoportal mapping JSON nbjects."""
prefix_map = msdf.prefix_map
metadata: Dict[str, Any] = msdf.metadata if msdf.metadata is not None else {}
m_list = []

def resolve(x):
return _resolve_url(x, prefix_map)

if msdf.df is not None:
for _, row in msdf.df.iterrows():
json_obj = {
"classes": [resolve(row["subject_id"]), resolve(row["object_id"])],
"subject_source_id": _resolve_prefix(
row.get("subject_source", ""), prefix_map
),
"object_source_id": _resolve_prefix(
row.get("object_source", ""), prefix_map
),
"source_name": metadata.get("mapping_set_id", ""),
"source_contact_info": ",".join(metadata.get("creator_id", "")),
"date": metadata.get("mapping_date", row.get("mapping_date", "")),
"name": metadata.get("mapping_set_description", ""),
"source": resolve(row.get("mapping_justification", "")),
"comment": row.get("comment", ""),
"relation": [resolve(row["predicate_id"])],
}
m_list.append(json_obj)

return m_list


# Support methods


Expand All @@ -506,6 +552,8 @@ def get_writer_function(
return write_json, output_format
elif output_format == "fhir_json":
return write_fhir_json, output_format
elif output_format == "ontoportal_json":
return write_ontoportal_json, output_format
elif output_format == "owl":
return write_owl, SSSOM_DEFAULT_RDF_SERIALISATION
else:
Expand Down Expand Up @@ -559,3 +607,18 @@ def _get_separator(serialisation: Optional[str] = None) -> str:
f"Unknown table format: {serialisation}, should be one of tsv or csv"
)
return sep


def _resolve_url(prefixed_url_str: str, prefix_map: PrefixMap) -> str:
if not prefixed_url_str:
return prefixed_url_str

prefix_url = prefixed_url_str.split(":")
if len(prefix_url) != 2:
return prefixed_url_str
else:
return _resolve_prefix(prefix_url[0], prefix_map) + prefix_url[1]


def _resolve_prefix(prefix_str, prefix_map: PrefixMap) -> str:
return prefix_map.get(prefix_str, prefix_str + ":")
15 changes: 15 additions & 0 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import unittest
from typing import Dict

from rdflib import Graph

Expand All @@ -13,6 +14,7 @@
from sssom.writers import (
to_dataframe,
to_json,
to_ontoportal_json,
to_owl_graph,
to_rdf_graph,
write_json,
Expand Down Expand Up @@ -51,6 +53,8 @@ def test_conversion(self):
logging.info("Testing JSON export")
self._test_to_json_dict(mdoc, test)
self._test_to_json(mdoc, test)
logging.info("Testing ontoportal JSON export")
self._test_to_ontoportal_json(mdoc, test)

def _test_to_owl_graph(self, mdoc, test):
msdf = to_mapping_set_dataframe(mdoc)
Expand All @@ -74,6 +78,17 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase):
with open(test.get_out_file("json"), "w") as file:
write_json(msdf, file, serialisation="json")

def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase):
msdf = to_mapping_set_dataframe(mdoc)
jsonob = to_ontoportal_json(msdf)
self.assertEqual(len(jsonob), test.ct_data_frame_rows)
first_ob: Dict = jsonob[0]
self.assertTrue("classes" in first_ob)
self.assertTrue(len(first_ob.get("classes")) == 2)
self.assertTrue("relation" in first_ob)
self.assertIsInstance(first_ob.get("relation"), list)
self.assertGreater(len(first_ob.get("relation")), 0)

def _test_to_rdf_graph(self, mdoc, test):
msdf = to_mapping_set_dataframe(mdoc)
g = to_rdf_graph(msdf)
Expand Down
24 changes: 23 additions & 1 deletion tests/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
from jsonasobj2 import JsonObj

from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table
from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table
from sssom.writers import (
write_fhir_json,
write_json,
write_ontoportal_json,
write_owl,
write_rdf,
write_table,
)
from tests.constants import data_dir as test_data_dir
from tests.constants import test_out_dir

Expand Down Expand Up @@ -84,3 +91,18 @@ def test_write_sssom_owl(self):
# FIXME this test doesn't test anything
# TODO implement "read_owl" function
self.assertEqual(1, 1)

def test_write_sssom_ontoportal_json(self):
"""Test writing as ontoportal JSON."""
path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json")
with open(path, "w") as file:
write_ontoportal_json(self.msdf, file)

with open(path, "r") as file:
d: list = json.load(file)

self.assertEqual(
len(d),
self.mapping_count,
f"{path} has the wrong number of mappings.",
)

0 comments on commit cc22dab

Please sign in to comment.