diff --git a/sssom/util.py b/sssom/util.py index 7f216ae5..be338741 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -93,7 +93,7 @@ "obographs-json", "json", ] -SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir"] +SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir", "ontoportal_json"] SSSOM_DEFAULT_RDF_SERIALISATION = "turtle" diff --git a/sssom/writers.py b/sssom/writers.py index f28c4fe1..3f8b09af 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -20,6 +20,7 @@ from .constants import SCHEMA_YAML from .parsers import to_mapping_set_document +from .typehints import PrefixMap from .util import ( PREFIX_MAP_KEY, RDF_FORMATS, @@ -147,6 +148,19 @@ def write_owl( print(t.decode(), file=file) +def write_ontoportal_json( + msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" +) -> None: + """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" + if serialisation == "ontoportal_json": + data = to_ontoportal_json(msdf) + json.dump(data, output, indent=2) + else: + raise ValueError( + f"Unknown json format: {serialisation}, currently only ontoportal_json supported" + ) + + # Converters # Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe) @@ -480,6 +494,38 @@ def to_json(msdf: MappingSetDataFrame) -> JsonObj: return json_obj +def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]: + """Convert a mapping set dataframe to a list of ontoportal mapping JSON nbjects.""" + prefix_map = msdf.prefix_map + metadata: Dict[str, Any] = msdf.metadata if msdf.metadata is not None else {} + m_list = [] + + def resolve(x): + return _resolve_url(x, prefix_map) + + if msdf.df is not None: + for _, row in msdf.df.iterrows(): + json_obj = { + "classes": [resolve(row["subject_id"]), resolve(row["object_id"])], + "subject_source_id": _resolve_prefix( + row.get("subject_source", ""), prefix_map + ), + "object_source_id": _resolve_prefix( + row.get("object_source", ""), prefix_map + ), + "source_name": metadata.get("mapping_set_id", ""), + "source_contact_info": ",".join(metadata.get("creator_id", "")), + "date": metadata.get("mapping_date", row.get("mapping_date", "")), + "name": metadata.get("mapping_set_description", ""), + "source": resolve(row.get("mapping_justification", "")), + "comment": row.get("comment", ""), + "relation": [resolve(row["predicate_id"])], + } + m_list.append(json_obj) + + return m_list + + # Support methods @@ -506,6 +552,8 @@ def get_writer_function( return write_json, output_format elif output_format == "fhir_json": return write_fhir_json, output_format + elif output_format == "ontoportal_json": + return write_ontoportal_json, output_format elif output_format == "owl": return write_owl, SSSOM_DEFAULT_RDF_SERIALISATION else: @@ -559,3 +607,18 @@ def _get_separator(serialisation: Optional[str] = None) -> str: f"Unknown table format: {serialisation}, should be one of tsv or csv" ) return sep + + +def _resolve_url(prefixed_url_str: str, prefix_map: PrefixMap) -> str: + if not prefixed_url_str: + return prefixed_url_str + + prefix_url = prefixed_url_str.split(":") + if len(prefix_url) != 2: + return prefixed_url_str + else: + return _resolve_prefix(prefix_url[0], prefix_map) + prefix_url[1] + + +def _resolve_prefix(prefix_str, prefix_map: PrefixMap) -> str: + return prefix_map.get(prefix_str, prefix_str + ":") diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 86b58554..76b69955 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -4,6 +4,7 @@ import json import logging import unittest +from typing import Dict from rdflib import Graph @@ -13,6 +14,7 @@ from sssom.writers import ( to_dataframe, to_json, + to_ontoportal_json, to_owl_graph, to_rdf_graph, write_json, @@ -51,6 +53,8 @@ def test_conversion(self): logging.info("Testing JSON export") self._test_to_json_dict(mdoc, test) self._test_to_json(mdoc, test) + logging.info("Testing ontoportal JSON export") + self._test_to_ontoportal_json(mdoc, test) def _test_to_owl_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) @@ -74,6 +78,17 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase): with open(test.get_out_file("json"), "w") as file: write_json(msdf, file, serialisation="json") + def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase): + msdf = to_mapping_set_dataframe(mdoc) + jsonob = to_ontoportal_json(msdf) + self.assertEqual(len(jsonob), test.ct_data_frame_rows) + first_ob: Dict = jsonob[0] + self.assertTrue("classes" in first_ob) + self.assertTrue(len(first_ob.get("classes")) == 2) + self.assertTrue("relation" in first_ob) + self.assertIsInstance(first_ob.get("relation"), list) + self.assertGreater(len(first_ob.get("relation")), 0) + def _test_to_rdf_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) g = to_rdf_graph(msdf) diff --git a/tests/test_writers.py b/tests/test_writers.py index 43051f80..e7f3d759 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -6,7 +6,14 @@ from jsonasobj2 import JsonObj from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table -from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table +from sssom.writers import ( + write_fhir_json, + write_json, + write_ontoportal_json, + write_owl, + write_rdf, + write_table, +) from tests.constants import data_dir as test_data_dir from tests.constants import test_out_dir @@ -84,3 +91,18 @@ def test_write_sssom_owl(self): # FIXME this test doesn't test anything # TODO implement "read_owl" function self.assertEqual(1, 1) + + def test_write_sssom_ontoportal_json(self): + """Test writing as ontoportal JSON.""" + path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json") + with open(path, "w") as file: + write_ontoportal_json(self.msdf, file) + + with open(path, "r") as file: + d: list = json.load(file) + + self.assertEqual( + len(d), + self.mapping_count, + f"{path} has the wrong number of mappings.", + )