Skip to content

Commit

Permalink
add unittest for collection module
Browse files Browse the repository at this point in the history
- add test for parsing list of lists and URIs
- add test for assertion for container with loops
- update rdflib version requirement
- handle URIRef type in list parsing
- change variable names to better match function behaviour
  • Loading branch information
minhnh committed Nov 14, 2024
1 parent 9dcf4b1 commit 5194000
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)"
]
dependencies = [
'rdflib',
'rdflib>=7.1.0',
'pyshacl',
'platformdirs',
]
Expand Down
46 changes: 27 additions & 19 deletions src/rdf_utils/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,62 @@


def _load_list_re(
graph: Graph, col_head: BNode, node_set: set[IdentifiedNode], parse_uri: bool, quiet: bool
graph: Graph, first_node: BNode, node_set: set[IdentifiedNode], parse_uri: bool, quiet: bool
) -> list[Any]:
"""Recursive internal function to extract list of lists from RDF list containers."""
col_data = []
for col_node in graph.items(list=col_head):
if isinstance(col_node, Literal):
node_str = col_node.toPython()
list_data = []
for node in graph.items(list=first_node):
if isinstance(node, URIRef):
list_data.append(node)
continue

if isinstance(node, Literal):
node_val = node.toPython()
if not isinstance(node_val, str):
list_data.append(node_val)
continue

if not parse_uri:
col_data.append(node_str)
list_data.append(node_val)
continue

# try to expand short-form URIs,
# if doesn't work then just return URIRef of the string
uri = try_expand_curie(
ns_manager=graph.namespace_manager, curie_str=node_str, quiet=quiet
ns_manager=graph.namespace_manager, curie_str=node_val, quiet=quiet
)
if uri is None:
uri = URIRef(node_str)
uri = URIRef(node_val)

col_data.append(uri)
list_data.append(uri)
continue

assert isinstance(
col_node, BNode
), f"load_collections: node '{col_node}' not a Literal or BNode, type: {type(col_node)}"
node, BNode
), f"load_collections: node '{node}' not a Literal or BNode, type: {type(node)}"

if col_node in node_set:
raise RuntimeError(f"Loop detected in collection at node: {col_node}")
node_set.add(col_node)
if node in node_set:
raise RuntimeError(f"Loop detected in collection at node: {node}")
node_set.add(node)

# recursive call
col_data.append(_load_list_re(graph, col_node, node_set, parse_uri, quiet))
list_data.append(_load_list_re(graph, node, node_set, parse_uri, quiet))

return col_data
return list_data


def load_list_re(
graph: Graph, col_head: BNode, parse_uri: bool = True, quiet: bool = True
graph: Graph, first_node: BNode, parse_uri: bool = True, quiet: bool = True
) -> list[Any]:
"""!Recursively iterate over RDF list containers for extracting lists of lists.
@param graph Graph object to extract the list(s) from
@param col_head First element in the list
@param first_node First element in the list
@param parse_uri if True will try converting literals into URIRef
@param quiet if True will not throw exceptions other than loop detection
@exception RuntimeError Raised when a loop is detected
@exception ValueError Raised when `quiet` is `False` and short URI cannot be expanded
"""
node_set = set()

return _load_list_re(graph, col_head, node_set, parse_uri, quiet)
return _load_list_re(graph, first_node, node_set, parse_uri, quiet)
68 changes: 68 additions & 0 deletions tests/test_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-License-Identifier: MPL-2.0
import unittest
from rdflib import RDF, BNode, Graph, URIRef
from rdf_utils.collection import load_list_re
from rdf_utils.uri import URL_SECORO_M, try_expand_curie


CORRECT_LIST_MODEL = f"""
{{
"@context": {{
"test": "{URL_SECORO_M}/tests/collection/",
"TestNode": {{ "@id": "test:TestNode" }},
"test-cont": {{ "@id": "test:has-container", "@container": "@list", "@type": "@id" }}
}},
"@graph": [
{{ "@id": "test:node1", "@type": "test:TestNode" }},
{{ "@id": "test:node2", "@type": "test:TestNode" }},
{{ "@id": "test:node3", "@type": "test:TestNode" }},
{{
"@id": "test:cont-node", "@type": "test:TestNode",
"test-cont": [
["test:node1", "test:node2"],
"test-node3"
]
}}
]
}}
"""


class CollectionTest(unittest.TestCase):
def test_load_list_re(self):
correct_g = Graph()
correct_g.parse(data=CORRECT_LIST_MODEL, format="json-ld")

cont_node_uri = try_expand_curie(
ns_manager=correct_g.namespace_manager, curie_str="test:cont-node", quiet=False
)
assert cont_node_uri is not None
cont_pred_uri = try_expand_curie(
ns_manager=correct_g.namespace_manager, curie_str="test:has-container", quiet=False
)
assert cont_pred_uri is not None

cont_bnode = correct_g.value(subject=cont_node_uri, predicate=cont_pred_uri)
assert isinstance(cont_bnode, BNode)
cont_list = load_list_re(
graph=correct_g, first_node=cont_bnode, parse_uri=True, quiet=False
)
self.assertTrue(len(cont_list[0]) == 2)
self.assertIsInstance(cont_list[1], URIRef)

def test_loop_exception(self):
loop_g = Graph()
b1 = BNode()
b2 = BNode()
loop_g.add((b1, RDF.first, b2))
loop_g.add((b1, RDF.rest, RDF.nil))
loop_g.add((b2, RDF.first, b1))
loop_g.add((b2, RDF.rest, RDF.nil))
with self.assertRaises(
RuntimeError, msg="test load_list_re: graph with loop should raise exception"
):
_ = load_list_re(graph=loop_g, first_node=b1)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion tests/test_event_loop_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)


URI_TEST_EL = f"{URL_SECORO_M}/models/tests/el"
URI_TEST_EL = f"{URL_SECORO_M}/tests/el"
URI_TEST_LOOP = f"{URI_TEST_EL}/test-loop"
URIREF_TEST_LOOP = URIRef(URI_TEST_LOOP)

Expand Down

0 comments on commit 5194000

Please sign in to comment.