Skip to content

Commit

Permalink
Parse list of lists (minhnh#10)
Browse files Browse the repository at this point in the history
* handle lists of lists & expand_curie exception

- add recursive functions to iterate over lists of lists
- add method optionally mute exception for expand_curie
- address minhnh#8

* add unittest for collection module

- add test for parsing list of lists and URIs
- add test for assertion for container with loops
- update rdflib version requirement
- handle URIRef type in list parsing
- change variable names to better match function behaviour
  • Loading branch information
minhnh authored Nov 15, 2024
1 parent bbc0168 commit 393cf60
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)"
]
dependencies = [
'rdflib',
'rdflib>=7.1.0',
'pyshacl',
'platformdirs',
]
Expand Down
4 changes: 3 additions & 1 deletion src/rdf_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: MPL-2.0
from importlib.metadata import version

RDF_UTILS_VERSION = version("rdf-utils")

__version__ = version("rdf-utils")
66 changes: 66 additions & 0 deletions src/rdf_utils/collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# SPDX-License-Identifier: MPL-2.0
from typing import Any
from rdflib import Graph, BNode, IdentifiedNode, Literal, URIRef
from rdf_utils.uri import try_expand_curie


def _load_list_re(
graph: Graph, first_node: BNode, node_set: set[IdentifiedNode], parse_uri: bool, quiet: bool
) -> list[Any]:
"""Recursive internal function to extract list of lists from RDF list containers."""
list_data = []
for node in graph.items(list=first_node):
if isinstance(node, URIRef):
list_data.append(node)
continue

if isinstance(node, Literal):
node_val = node.toPython()
if not isinstance(node_val, str):
list_data.append(node_val)
continue

if not parse_uri:
list_data.append(node_val)
continue

# try to expand short-form URIs,
# if doesn't work then just return URIRef of the string
uri = try_expand_curie(
ns_manager=graph.namespace_manager, curie_str=node_val, quiet=quiet
)
if uri is None:
uri = URIRef(node_val)

list_data.append(uri)
continue

assert isinstance(
node, BNode
), f"load_collections: node '{node}' not a Literal or BNode, type: {type(node)}"

if node in node_set:
raise RuntimeError(f"Loop detected in collection at node: {node}")
node_set.add(node)

# recursive call
list_data.append(_load_list_re(graph, node, node_set, parse_uri, quiet))

return list_data


def load_list_re(
graph: Graph, first_node: BNode, parse_uri: bool = True, quiet: bool = True
) -> list[Any]:
"""!Recursively iterate over RDF list containers for extracting lists of lists.
@param graph Graph object to extract the list(s) from
@param first_node First element in the list
@param parse_uri if True will try converting literals into URIRef
@param quiet if True will not throw exceptions other than loop detection
@exception RuntimeError Raised when a loop is detected
@exception ValueError Raised when `quiet` is `False` and short URI cannot be expanded
"""
node_set = set()

return _load_list_re(graph, first_node, node_set, parse_uri, quiet)
4 changes: 2 additions & 2 deletions src/rdf_utils/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import urllib.response
from email.message import EmailMessage
from rdf_utils.uri import URL_SECORO, URL_COMP_ROB2B
from rdf_utils import RDF_UTILS_VERSION
from rdf_utils import __version__


__PKG_CACHE_ROOT = join(platformdirs.user_cache_dir(), "rdf-utils")
Expand Down Expand Up @@ -37,7 +37,7 @@ def __init__(self, url_map: dict, download: bool = True):
def open(self, fullurl, data=None, timeout=_GLOBAL_DEFAULT_TIMEOUT):
if isinstance(fullurl, str):
url_req = urllib.request.Request(fullurl)
url_req.add_header("User-Agent", f"rdf-utils/{RDF_UTILS_VERSION}")
url_req.add_header("User-Agent", f"rdf-utils/{__version__}")
elif isinstance(fullurl, urllib.request.Request):
url_req = fullurl
else:
Expand Down
28 changes: 28 additions & 0 deletions src/rdf_utils/uri.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# SPDX-License-Identifier: MPL-2.0
from typing import Optional
from rdflib import URIRef
from rdflib.namespace import NamespaceManager


URL_COMP_ROB2B = "https://comp-rob2b.github.io"
URL_SECORO = "https://secorolab.github.io"
URL_SECORO_MM = f"{URL_SECORO}/metamodels"
Expand All @@ -19,3 +24,26 @@
URI_MM_EL = f"{URL_SECORO_MM}/behaviour/event_loop#"
URL_MM_EL_JSON = f"{URL_SECORO_MM}/behaviour/event_loop.json"
URL_MM_EL_SHACL = f"{URL_SECORO_MM}/behaviour/event_loop.shacl.ttl"


def try_expand_curie(
ns_manager: NamespaceManager, curie_str: str, quiet: bool = False
) -> Optional[URIRef]:
"""!Execute rdflib `expand_curie` with exception handling
@param ns_manager NamespaceManager object, usually can use the one in the Graph object
@param curie_str the short URI string to be expanded
@param quiet if False will raise ValueError, else return None
@return expanded URIRef or None
@exception ValueError
"""
try:
uri = ns_manager.expand_curie(curie_str)

except ValueError as e:
if quiet:
return None

raise ValueError(f"failed to expand '{curie_str}': {e}")

return uri
68 changes: 68 additions & 0 deletions tests/test_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-License-Identifier: MPL-2.0
import unittest
from rdflib import RDF, BNode, Graph, URIRef
from rdf_utils.collection import load_list_re
from rdf_utils.uri import URL_SECORO_M, try_expand_curie


CORRECT_LIST_MODEL = f"""
{{
"@context": {{
"test": "{URL_SECORO_M}/tests/collection/",
"TestNode": {{ "@id": "test:TestNode" }},
"test-cont": {{ "@id": "test:has-container", "@container": "@list", "@type": "@id" }}
}},
"@graph": [
{{ "@id": "test:node1", "@type": "test:TestNode" }},
{{ "@id": "test:node2", "@type": "test:TestNode" }},
{{ "@id": "test:node3", "@type": "test:TestNode" }},
{{
"@id": "test:cont-node", "@type": "test:TestNode",
"test-cont": [
["test:node1", "test:node2"],
"test-node3"
]
}}
]
}}
"""


class CollectionTest(unittest.TestCase):
def test_load_list_re(self):
correct_g = Graph()
correct_g.parse(data=CORRECT_LIST_MODEL, format="json-ld")

cont_node_uri = try_expand_curie(
ns_manager=correct_g.namespace_manager, curie_str="test:cont-node", quiet=False
)
assert cont_node_uri is not None
cont_pred_uri = try_expand_curie(
ns_manager=correct_g.namespace_manager, curie_str="test:has-container", quiet=False
)
assert cont_pred_uri is not None

cont_bnode = correct_g.value(subject=cont_node_uri, predicate=cont_pred_uri)
assert isinstance(cont_bnode, BNode)
cont_list = load_list_re(
graph=correct_g, first_node=cont_bnode, parse_uri=True, quiet=False
)
self.assertTrue(len(cont_list[0]) == 2)
self.assertIsInstance(cont_list[1], URIRef)

def test_loop_exception(self):
loop_g = Graph()
b1 = BNode()
b2 = BNode()
loop_g.add((b1, RDF.first, b2))
loop_g.add((b1, RDF.rest, RDF.nil))
loop_g.add((b2, RDF.first, b1))
loop_g.add((b2, RDF.rest, RDF.nil))
with self.assertRaises(
RuntimeError, msg="test load_list_re: graph with loop should raise exception"
):
_ = load_list_re(graph=loop_g, first_node=b1)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion tests/test_event_loop_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)


URI_TEST_EL = f"{URL_SECORO_M}/models/tests/el"
URI_TEST_EL = f"{URL_SECORO_M}/tests/el"
URI_TEST_LOOP = f"{URI_TEST_EL}/test-loop"
URIREF_TEST_LOOP = URIRef(URI_TEST_LOOP)

Expand Down

0 comments on commit 393cf60

Please sign in to comment.