From 19387a56669db9681e11afcac621d77b9fcd1da8 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 18:02:20 +0200 Subject: [PATCH 01/11] add conversion to ontoportal json tests --- tests/test_conversion.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 86b58554..297c8eda 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -4,6 +4,7 @@ import json import logging import unittest +from typing import Dict from rdflib import Graph @@ -18,7 +19,7 @@ write_json, write_owl, write_rdf, - write_table, + write_table, to_ontoportal_json, ) from .test_data import SSSOMTestCase, get_all_test_cases @@ -51,6 +52,8 @@ def test_conversion(self): logging.info("Testing JSON export") self._test_to_json_dict(mdoc, test) self._test_to_json(mdoc, test) + logging.info("Testing ontoportal JSON export") + self._test_to_ontoportal_json(mdoc,test) def _test_to_owl_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) @@ -74,6 +77,20 @@ def _test_to_json(self, mdoc, test: SSSOMTestCase): with open(test.get_out_file("json"), "w") as file: write_json(msdf, file, serialisation="json") + def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase): + msdf = to_mapping_set_dataframe(mdoc) + jsonob = to_ontoportal_json(msdf) + self.assertEqual(len(jsonob), test.ct_data_frame_rows) + first_ob: Dict = jsonob[0] + self.assertTrue("classes" in first_ob) + self.assertTrue(len(first_ob.get("classes")) == 2) + self.assertTrue("process" in first_ob) + first_process_ob: Dict = first_ob.get("process") + + self.assertTrue("relation" in first_process_ob) + self.assertIsInstance(first_process_ob.get("relation"), list) + self.assertGreater(len(first_process_ob.get("relation")), 0) + def _test_to_rdf_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) g = to_rdf_graph(msdf) From a1db9060195adaf5909db98ced3b8b8d686c99d0 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 18:08:34 +0200 Subject: [PATCH 02/11] add the to_ontoportal_json converter --- sssom/writers.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sssom/writers.py b/sssom/writers.py index 35bcf9ba..ec06e5af 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -18,6 +18,7 @@ from .constants import SCHEMA_YAML from .parsers import to_mapping_set_document +from .typehints import PrefixMap from .util import ( PREFIX_MAP_KEY, RDF_FORMATS, @@ -474,6 +475,30 @@ def to_json(msdf: MappingSetDataFrame) -> JsonObj: json_obj = json.loads(data) return json_obj +def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]: + """Convert a mapping set dataframe to a list of ontoportal mapping JSON nbjects.""" + prefix_map = msdf.prefix_map + metadata: Dict[str, Any] = msdf.metadata if msdf.metadata is not None else {} + mList = [] + resolve = lambda x: _resolve_url(x, prefix_map) + for row_index, row in msdf.df.iterrows(): + json_obj = { + "classes": [resolve(row["subject_id"]), resolve(row["object_id"])], + "subject_source_id": _resolve_prefix(row.get("subject_source", ""), prefix_map), + "object_source_id": _resolve_prefix(row.get("object_source", ""), prefix_map), + "source_name": metadata.get("mapping_set_id", ""), + "source_contact_info": ','.join(metadata.get("creator_id", "")), + "date": metadata.get('mapping_date', row.get("mapping_date", "")), + "process": { + "name": metadata.get("mapping_set_description", ""), + "source": resolve(row.get("mapping_justification", "")), + "comment": row.get("comment", ""), + "relation": [resolve(row["predicate_id"])], + } + } + mList.append(json_obj) + + return mList # Support methods @@ -554,3 +579,17 @@ def _get_separator(serialisation: Optional[str] = None) -> str: f"Unknown table format: {serialisation}, should be one of tsv or csv" ) return sep + +def _resolve_url(prefixed_url_str: str, prefix_map: PrefixMap) -> str: + if not prefixed_url_str: + return prefixed_url_str + + prefix_url = prefixed_url_str.split(":") + if len(prefix_url) != 2: + return prefixed_url_str + else: + return _resolve_prefix(prefix_url[0], prefix_map) + prefix_url[1] + + +def _resolve_prefix(prefix_str, prefix_map: PrefixMap) -> str: + return prefix_map.get(prefix_str, prefix_str + ":") From ae8787bf2621df22f52daf7854c30d8d8ab7a6a8 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 18:32:37 +0200 Subject: [PATCH 03/11] add the write_sssom_ontoportal_json test --- tests/test_writers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_writers.py b/tests/test_writers.py index 43051f80..527b4c81 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -6,7 +6,7 @@ from jsonasobj2 import JsonObj from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table -from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table +from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table, write_ontoportal_json from tests.constants import data_dir as test_data_dir from tests.constants import test_out_dir @@ -84,3 +84,18 @@ def test_write_sssom_owl(self): # FIXME this test doesn't test anything # TODO implement "read_owl" function self.assertEqual(1, 1) + + def test_write_sssom_ontoportal_json(self): + """Test writing as ontoportal JSON.""" + path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json") + with open(path, "w") as file: + write_ontoportal_json(self.msdf, file) + + with open(path, "r") as file: + d: list = json.load(file) + + self.assertEqual( + len(d), + self.mapping_count, + f"{path} has the wrong number of mappings.", + ) \ No newline at end of file From afe648f0fb446363ff4d765e15c733649dd5cf89 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 18:34:44 +0200 Subject: [PATCH 04/11] add the write_ontoportal_json function --- sssom/writers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sssom/writers.py b/sssom/writers.py index ec06e5af..d3f53ef8 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -143,6 +143,16 @@ def write_owl( print(t.decode(), file=file) +def write_ontoportal_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None: + if serialisation == "json": + data = to_ontoportal_json(msdf) + json.dump(data, output, indent=2) + else: + raise ValueError( + f"Unknown json format: {serialisation}, currently only json supported" + ) + + # Converters # Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe) From 6321bc68cdfaf071036e847a4d6542eea49e523f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 18:46:13 +0200 Subject: [PATCH 05/11] add the ontoportal writer to the list of writers of the CLI --- sssom/writers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sssom/writers.py b/sssom/writers.py index d3f53ef8..a7b2c052 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -536,6 +536,8 @@ def get_writer_function( return write_json, output_format elif output_format == "fhir_json": return write_fhir_json, output_format + elif output_format == "ontoportal_json": + return write_ontoportal_json, output_format elif output_format == "owl": return write_owl, SSSOM_DEFAULT_RDF_SERIALISATION else: From dd38bcaa925def4dc4096b0b5b845c4d70e82fc9 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 19:20:06 +0200 Subject: [PATCH 06/11] change the serialisation parameter to use "ontoportal_json" --- sssom/writers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sssom/writers.py b/sssom/writers.py index a7b2c052..40044f51 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -143,13 +143,16 @@ def write_owl( print(t.decode(), file=file) -def write_ontoportal_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None: - if serialisation == "json": +def write_ontoportal_json( + msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" +) -> None: + """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" + if serialisation == "ontoportal_json": data = to_ontoportal_json(msdf) json.dump(data, output, indent=2) else: raise ValueError( - f"Unknown json format: {serialisation}, currently only json supported" + f"Unknown json format: {serialisation}, currently only ontoportal_json supported" ) From 74c6bec35cb8582af1bfeb372527bac1ae795186 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 19:21:15 +0200 Subject: [PATCH 07/11] fix linters issues --- sssom/writers.py | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/sssom/writers.py b/sssom/writers.py index 40044f51..9c0fb0bc 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -488,30 +488,40 @@ def to_json(msdf: MappingSetDataFrame) -> JsonObj: json_obj = json.loads(data) return json_obj + def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]: """Convert a mapping set dataframe to a list of ontoportal mapping JSON nbjects.""" prefix_map = msdf.prefix_map metadata: Dict[str, Any] = msdf.metadata if msdf.metadata is not None else {} - mList = [] - resolve = lambda x: _resolve_url(x, prefix_map) - for row_index, row in msdf.df.iterrows(): - json_obj = { - "classes": [resolve(row["subject_id"]), resolve(row["object_id"])], - "subject_source_id": _resolve_prefix(row.get("subject_source", ""), prefix_map), - "object_source_id": _resolve_prefix(row.get("object_source", ""), prefix_map), - "source_name": metadata.get("mapping_set_id", ""), - "source_contact_info": ','.join(metadata.get("creator_id", "")), - "date": metadata.get('mapping_date', row.get("mapping_date", "")), - "process": { - "name": metadata.get("mapping_set_description", ""), - "source": resolve(row.get("mapping_justification", "")), - "comment": row.get("comment", ""), - "relation": [resolve(row["predicate_id"])], + m_list = [] + + def resolve(x): + return _resolve_url(x, prefix_map) + + if msdf.df is not None: + for _, row in msdf.df.iterrows(): + json_obj = { + "classes": [resolve(row["subject_id"]), resolve(row["object_id"])], + "subject_source_id": _resolve_prefix( + row.get("subject_source", ""), prefix_map + ), + "object_source_id": _resolve_prefix( + row.get("object_source", ""), prefix_map + ), + "source_name": metadata.get("mapping_set_id", ""), + "source_contact_info": ",".join(metadata.get("creator_id", "")), + "date": metadata.get("mapping_date", row.get("mapping_date", "")), + "process": { + "name": metadata.get("mapping_set_description", ""), + "source": resolve(row.get("mapping_justification", "")), + "comment": row.get("comment", ""), + "relation": [resolve(row["predicate_id"])], + }, } - } - mList.append(json_obj) + m_list.append(json_obj) + + return m_list - return mList # Support methods @@ -595,6 +605,7 @@ def _get_separator(serialisation: Optional[str] = None) -> str: ) return sep + def _resolve_url(prefixed_url_str: str, prefix_map: PrefixMap) -> str: if not prefixed_url_str: return prefixed_url_str From 74be1f1cdb5fa68d95628a5681ed331d2353dbc5 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 4 Jul 2022 19:21:58 +0200 Subject: [PATCH 08/11] add "ontoportal_json" to SSSOM_EXPORT_FORMATS array --- sssom/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sssom/util.py b/sssom/util.py index 8906703f..8bed5b2d 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -76,7 +76,7 @@ "obographs-json", "json", ] -SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir"] +SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json", "fhir", "ontoportal_json"] SSSOM_DEFAULT_RDF_SERIALISATION = "turtle" From 9bf303758ee2111e26edf82a8e4725aba51019a5 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 20 Mar 2023 10:15:13 +0100 Subject: [PATCH 09/11] remove the process section form the ontoportal format --- sssom/writers.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/sssom/writers.py b/sssom/writers.py index f91bcfb0..ca3cb9d4 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -51,11 +51,11 @@ def write_table( - msdf: MappingSetDataFrame, - file: TextIO, - embedded_mode: bool = True, - serialisation="tsv", - sort=False, + msdf: MappingSetDataFrame, + file: TextIO, + embedded_mode: bool = True, + serialisation="tsv", + sort=False, ) -> None: """Write a mapping set dataframe to the file as a table.""" if msdf.df is None: @@ -90,9 +90,9 @@ def write_table( def write_rdf( - msdf: MappingSetDataFrame, - file: TextIO, - serialisation: Optional[str] = None, + msdf: MappingSetDataFrame, + file: TextIO, + serialisation: Optional[str] = None, ) -> None: """Write a mapping set dataframe to the file as RDF.""" if serialisation is None: @@ -112,7 +112,7 @@ def write_rdf( # todo: not sure the need for serialization param here; seems superfluous for some of these funcs def write_fhir_json( - msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir" + msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir" ) -> None: """Write a mapping set dataframe to the file as FHIR ConceptMap JSON.""" data = to_fhir_json(msdf) @@ -131,9 +131,9 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") def write_owl( - msdf: MappingSetDataFrame, - file: TextIO, - serialisation=SSSOM_DEFAULT_RDF_SERIALISATION, + msdf: MappingSetDataFrame, + file: TextIO, + serialisation=SSSOM_DEFAULT_RDF_SERIALISATION, ) -> None: """Write a mapping set dataframe to the file as OWL.""" if serialisation not in RDF_FORMATS: @@ -149,7 +149,7 @@ def write_owl( def write_ontoportal_json( - msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" + msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" ) -> None: """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" if serialisation == "ontoportal_json": @@ -516,12 +516,10 @@ def resolve(x): "source_name": metadata.get("mapping_set_id", ""), "source_contact_info": ",".join(metadata.get("creator_id", "")), "date": metadata.get("mapping_date", row.get("mapping_date", "")), - "process": { - "name": metadata.get("mapping_set_description", ""), - "source": resolve(row.get("mapping_justification", "")), - "comment": row.get("comment", ""), - "relation": [resolve(row["predicate_id"])], - }, + "name": metadata.get("mapping_set_description", ""), + "source": resolve(row.get("mapping_justification", "")), + "comment": row.get("comment", ""), + "relation": [resolve(row["predicate_id"])] } m_list.append(json_obj) @@ -532,7 +530,7 @@ def resolve(x): def get_writer_function( - *, output_format: Optional[str] = None, output: TextIO + *, output_format: Optional[str] = None, output: TextIO ) -> Tuple[MSDFWriter, str]: """Get appropriate writer function based on file format. @@ -563,7 +561,7 @@ def get_writer_function( def write_tables( - sssom_dict: Dict[str, MappingSetDataFrame], output_dir: Union[str, Path] + sssom_dict: Dict[str, MappingSetDataFrame], output_dir: Union[str, Path] ) -> None: """Write table from MappingSetDataFrame object. From 25465460c08ed4b3b97d25710e2b204fc75aa444 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 21 Mar 2023 08:51:49 +0100 Subject: [PATCH 10/11] auto lint code --- sssom/writers.py | 32 ++++++++++++++++---------------- tests/test_conversion.py | 5 +++-- tests/test_writers.py | 11 +++++++++-- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/sssom/writers.py b/sssom/writers.py index ca3cb9d4..3f8b09af 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -51,11 +51,11 @@ def write_table( - msdf: MappingSetDataFrame, - file: TextIO, - embedded_mode: bool = True, - serialisation="tsv", - sort=False, + msdf: MappingSetDataFrame, + file: TextIO, + embedded_mode: bool = True, + serialisation="tsv", + sort=False, ) -> None: """Write a mapping set dataframe to the file as a table.""" if msdf.df is None: @@ -90,9 +90,9 @@ def write_table( def write_rdf( - msdf: MappingSetDataFrame, - file: TextIO, - serialisation: Optional[str] = None, + msdf: MappingSetDataFrame, + file: TextIO, + serialisation: Optional[str] = None, ) -> None: """Write a mapping set dataframe to the file as RDF.""" if serialisation is None: @@ -112,7 +112,7 @@ def write_rdf( # todo: not sure the need for serialization param here; seems superfluous for some of these funcs def write_fhir_json( - msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir" + msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir" ) -> None: """Write a mapping set dataframe to the file as FHIR ConceptMap JSON.""" data = to_fhir_json(msdf) @@ -131,9 +131,9 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") def write_owl( - msdf: MappingSetDataFrame, - file: TextIO, - serialisation=SSSOM_DEFAULT_RDF_SERIALISATION, + msdf: MappingSetDataFrame, + file: TextIO, + serialisation=SSSOM_DEFAULT_RDF_SERIALISATION, ) -> None: """Write a mapping set dataframe to the file as OWL.""" if serialisation not in RDF_FORMATS: @@ -149,7 +149,7 @@ def write_owl( def write_ontoportal_json( - msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" + msdf: MappingSetDataFrame, output: TextIO, serialisation="ontoportal_json" ) -> None: """Write a mapping set dataframe to the file as the ontoportal mapping JSON model.""" if serialisation == "ontoportal_json": @@ -519,7 +519,7 @@ def resolve(x): "name": metadata.get("mapping_set_description", ""), "source": resolve(row.get("mapping_justification", "")), "comment": row.get("comment", ""), - "relation": [resolve(row["predicate_id"])] + "relation": [resolve(row["predicate_id"])], } m_list.append(json_obj) @@ -530,7 +530,7 @@ def resolve(x): def get_writer_function( - *, output_format: Optional[str] = None, output: TextIO + *, output_format: Optional[str] = None, output: TextIO ) -> Tuple[MSDFWriter, str]: """Get appropriate writer function based on file format. @@ -561,7 +561,7 @@ def get_writer_function( def write_tables( - sssom_dict: Dict[str, MappingSetDataFrame], output_dir: Union[str, Path] + sssom_dict: Dict[str, MappingSetDataFrame], output_dir: Union[str, Path] ) -> None: """Write table from MappingSetDataFrame object. diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 297c8eda..1fe123c2 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -14,12 +14,13 @@ from sssom.writers import ( to_dataframe, to_json, + to_ontoportal_json, to_owl_graph, to_rdf_graph, write_json, write_owl, write_rdf, - write_table, to_ontoportal_json, + write_table, ) from .test_data import SSSOMTestCase, get_all_test_cases @@ -53,7 +54,7 @@ def test_conversion(self): self._test_to_json_dict(mdoc, test) self._test_to_json(mdoc, test) logging.info("Testing ontoportal JSON export") - self._test_to_ontoportal_json(mdoc,test) + self._test_to_ontoportal_json(mdoc, test) def _test_to_owl_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc) diff --git a/tests/test_writers.py b/tests/test_writers.py index 527b4c81..e7f3d759 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -6,7 +6,14 @@ from jsonasobj2 import JsonObj from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table -from sssom.writers import write_fhir_json, write_json, write_owl, write_rdf, write_table, write_ontoportal_json +from sssom.writers import ( + write_fhir_json, + write_json, + write_ontoportal_json, + write_owl, + write_rdf, + write_table, +) from tests.constants import data_dir as test_data_dir from tests.constants import test_out_dir @@ -98,4 +105,4 @@ def test_write_sssom_ontoportal_json(self): len(d), self.mapping_count, f"{path} has the wrong number of mappings.", - ) \ No newline at end of file + ) From aedcd7840d9d1b49bde7f911e5c8141fdb25e78b Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Tue, 21 Mar 2023 11:40:27 +0100 Subject: [PATCH 11/11] update the test for the new ontoportal mappings format --- tests/test_conversion.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 1fe123c2..76b69955 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -85,12 +85,9 @@ def _test_to_ontoportal_json(self, mdoc, test: SSSOMTestCase): first_ob: Dict = jsonob[0] self.assertTrue("classes" in first_ob) self.assertTrue(len(first_ob.get("classes")) == 2) - self.assertTrue("process" in first_ob) - first_process_ob: Dict = first_ob.get("process") - - self.assertTrue("relation" in first_process_ob) - self.assertIsInstance(first_process_ob.get("relation"), list) - self.assertGreater(len(first_process_ob.get("relation")), 0) + self.assertTrue("relation" in first_ob) + self.assertIsInstance(first_ob.get("relation"), list) + self.assertGreater(len(first_ob.get("relation")), 0) def _test_to_rdf_graph(self, mdoc, test): msdf = to_mapping_set_dataframe(mdoc)