diff --git a/pyproject.toml b/pyproject.toml index c00956e..16446d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)" ] dependencies = [ - 'rdflib', + 'rdflib>=7.1.0', 'pyshacl', 'platformdirs', ] diff --git a/src/rdf_utils/collection.py b/src/rdf_utils/collection.py index 78b1243..518ec18 100644 --- a/src/rdf_utils/collection.py +++ b/src/rdf_utils/collection.py @@ -5,49 +5,57 @@ def _load_list_re( - graph: Graph, col_head: BNode, node_set: set[IdentifiedNode], parse_uri: bool, quiet: bool + graph: Graph, first_node: BNode, node_set: set[IdentifiedNode], parse_uri: bool, quiet: bool ) -> list[Any]: """Recursive internal function to extract list of lists from RDF list containers.""" - col_data = [] - for col_node in graph.items(list=col_head): - if isinstance(col_node, Literal): - node_str = col_node.toPython() + list_data = [] + for node in graph.items(list=first_node): + if isinstance(node, URIRef): + list_data.append(node) + continue + + if isinstance(node, Literal): + node_val = node.toPython() + if not isinstance(node_val, str): + list_data.append(node_val) + continue + if not parse_uri: - col_data.append(node_str) + list_data.append(node_val) continue # try to expand short-form URIs, # if doesn't work then just return URIRef of the string uri = try_expand_curie( - ns_manager=graph.namespace_manager, curie_str=node_str, quiet=quiet + ns_manager=graph.namespace_manager, curie_str=node_val, quiet=quiet ) if uri is None: - uri = URIRef(node_str) + uri = URIRef(node_val) - col_data.append(uri) + list_data.append(uri) continue assert isinstance( - col_node, BNode - ), f"load_collections: node '{col_node}' not a Literal or BNode, type: {type(col_node)}" + node, BNode + ), f"load_collections: node '{node}' not a Literal or BNode, type: {type(node)}" - if col_node in node_set: - raise RuntimeError(f"Loop detected in collection at node: {col_node}") - node_set.add(col_node) + if node in node_set: + raise RuntimeError(f"Loop detected in collection at node: {node}") + node_set.add(node) # recursive call - col_data.append(_load_list_re(graph, col_node, node_set, parse_uri, quiet)) + list_data.append(_load_list_re(graph, node, node_set, parse_uri, quiet)) - return col_data + return list_data def load_list_re( - graph: Graph, col_head: BNode, parse_uri: bool = True, quiet: bool = True + graph: Graph, first_node: BNode, parse_uri: bool = True, quiet: bool = True ) -> list[Any]: """!Recursively iterate over RDF list containers for extracting lists of lists. @param graph Graph object to extract the list(s) from - @param col_head First element in the list + @param first_node First element in the list @param parse_uri if True will try converting literals into URIRef @param quiet if True will not throw exceptions other than loop detection @exception RuntimeError Raised when a loop is detected @@ -55,4 +63,4 @@ def load_list_re( """ node_set = set() - return _load_list_re(graph, col_head, node_set, parse_uri, quiet) + return _load_list_re(graph, first_node, node_set, parse_uri, quiet) diff --git a/tests/test_collection.py b/tests/test_collection.py new file mode 100644 index 0000000..c5c7a84 --- /dev/null +++ b/tests/test_collection.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: MPL-2.0 +import unittest +from rdflib import RDF, BNode, Graph, URIRef +from rdf_utils.collection import load_list_re +from rdf_utils.uri import URL_SECORO_M, try_expand_curie + + +CORRECT_LIST_MODEL = f""" +{{ + "@context": {{ + "test": "{URL_SECORO_M}/tests/collection/", + "TestNode": {{ "@id": "test:TestNode" }}, + "test-cont": {{ "@id": "test:has-container", "@container": "@list", "@type": "@id" }} + }}, + "@graph": [ + {{ "@id": "test:node1", "@type": "test:TestNode" }}, + {{ "@id": "test:node2", "@type": "test:TestNode" }}, + {{ "@id": "test:node3", "@type": "test:TestNode" }}, + {{ + "@id": "test:cont-node", "@type": "test:TestNode", + "test-cont": [ + ["test:node1", "test:node2"], + "test-node3" + ] + }} + ] +}} +""" + + +class CollectionTest(unittest.TestCase): + def test_load_list_re(self): + correct_g = Graph() + correct_g.parse(data=CORRECT_LIST_MODEL, format="json-ld") + + cont_node_uri = try_expand_curie( + ns_manager=correct_g.namespace_manager, curie_str="test:cont-node", quiet=False + ) + assert cont_node_uri is not None + cont_pred_uri = try_expand_curie( + ns_manager=correct_g.namespace_manager, curie_str="test:has-container", quiet=False + ) + assert cont_pred_uri is not None + + cont_bnode = correct_g.value(subject=cont_node_uri, predicate=cont_pred_uri) + assert isinstance(cont_bnode, BNode) + cont_list = load_list_re( + graph=correct_g, first_node=cont_bnode, parse_uri=True, quiet=False + ) + self.assertTrue(len(cont_list[0]) == 2) + self.assertIsInstance(cont_list[1], URIRef) + + def test_loop_exception(self): + loop_g = Graph() + b1 = BNode() + b2 = BNode() + loop_g.add((b1, RDF.first, b2)) + loop_g.add((b1, RDF.rest, RDF.nil)) + loop_g.add((b2, RDF.first, b1)) + loop_g.add((b2, RDF.rest, RDF.nil)) + with self.assertRaises( + RuntimeError, msg="test load_list_re: graph with loop should raise exception" + ): + _ = load_list_re(graph=loop_g, first_node=b1) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_event_loop_model.py b/tests/test_event_loop_model.py index 8992f54..4fd47a9 100644 --- a/tests/test_event_loop_model.py +++ b/tests/test_event_loop_model.py @@ -14,7 +14,7 @@ ) -URI_TEST_EL = f"{URL_SECORO_M}/models/tests/el" +URI_TEST_EL = f"{URL_SECORO_M}/tests/el" URI_TEST_LOOP = f"{URI_TEST_EL}/test-loop" URIREF_TEST_LOOP = URIRef(URI_TEST_LOOP)