diff --git a/docs/README.md b/docs/README.md index e69de29..effe4ba 100644 --- a/docs/README.md +++ b/docs/README.md @@ -0,0 +1,4 @@ +# RDF Utilities + +Tools for managing RDF resources and common models. +See [API documentation](reference/rdf_utils/) for more details. diff --git a/mkdocs.yml b/mkdocs.yml index ae8ee7c..7109a9c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -50,17 +50,15 @@ plugins: default_handler: python handlers: python: - # cross-ref to other projects import: - https://docs.python.org/3/objects.inv - https://rdflib.readthedocs.io/en/stable/objects.inv paths: [src] options: - # see full list of options at https://mkdocstrings.github.io/python/usage/ docstring_options: ignore_init_summary: true - docstring_section_style: list merge_init_into_class: true + docstring_section_style: list separate_signature: true heading_level: 1 summary: true diff --git a/src/rdf_utils/caching.py b/src/rdf_utils/caching.py index da5e2d1..42a4f32 100644 --- a/src/rdf_utils/caching.py +++ b/src/rdf_utils/caching.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MPL-2.0 +"""Utilites for caching file contents""" from socket import _GLOBAL_DEFAULT_TIMEOUT import urllib.request @@ -10,8 +11,9 @@ def read_file_and_cache(filepath: str) -> str: """Read and cache string contents of files for quick access and reducing IO operations. - May need "forgetting" mechanism if too many large files are stored. Should be fine - for loading JSON metamodels and SHACL constraints in Turtle format. + Note: + May need "forgetting" mechanism if too many large files are stored. Should be fine + for loading JSON metamodels and SHACL constraints in Turtle format. """ if filepath in __FILE_LOADER_CACHE: return __FILE_LOADER_CACHE[filepath] @@ -26,11 +28,14 @@ def read_file_and_cache(filepath: str) -> str: return file_content -def read_url_and_cache(url: str, timeout=_GLOBAL_DEFAULT_TIMEOUT) -> str: +def read_url_and_cache(url: str, timeout: float = _GLOBAL_DEFAULT_TIMEOUT) -> str: """Read and cache text responses from URL - `timeout` specifies duration in seconds to wait for response. Only works for HTTP, HTTPS & FTP. - By default `socket._GLOBAL_DEFAULT_TIMEOUT` will be used, which usually means no timeout. + Parameters: + url: URL to be opened with urllib + timeout: duration in seconds to wait for response. Only works for HTTP, HTTPS & FTP. + Default: `socket._GLOBAL_DEFAULT_TIMEOUT` will be used, + which usually means no timeout. """ if url in __URL_CONTENT_CACHE: return __URL_CONTENT_CACHE[url] diff --git a/src/rdf_utils/collection.py b/src/rdf_utils/collection.py index 518ec18..334dc2f 100644 --- a/src/rdf_utils/collection.py +++ b/src/rdf_utils/collection.py @@ -52,14 +52,17 @@ def _load_list_re( def load_list_re( graph: Graph, first_node: BNode, parse_uri: bool = True, quiet: bool = True ) -> list[Any]: - """!Recursively iterate over RDF list containers for extracting lists of lists. + """Recursively iterate over RDF list containers for extracting lists of lists. - @param graph Graph object to extract the list(s) from - @param first_node First element in the list - @param parse_uri if True will try converting literals into URIRef - @param quiet if True will not throw exceptions other than loop detection - @exception RuntimeError Raised when a loop is detected - @exception ValueError Raised when `quiet` is `False` and short URI cannot be expanded + Parameters: + graph: Graph object to extract the list(s) from + first_node: First element in the list + parse_uri: if True will try converting literals into URIRef + quiet: if True will not throw exceptions other than loop detection + + Raises: + RuntimeError: When a loop is detected + ValueError: When `quiet` is `False` and short URI cannot be expanded """ node_set = set() diff --git a/src/rdf_utils/constraints.py b/src/rdf_utils/constraints.py index a90e67f..4733b9e 100644 --- a/src/rdf_utils/constraints.py +++ b/src/rdf_utils/constraints.py @@ -1,24 +1,34 @@ # SPDX-License-Identifier: MPL-2.0 -from typing import Dict from rdflib import Dataset, Graph import pyshacl class ConstraintViolation(Exception): - def __init__(self, domain, message): + """Exception for domain-specific constraint violation + + Attributes: + domain: the violation's domain + """ + domain: str + + def __init__(self, domain: str, message: str): super().__init__(f"{domain} constraint violated: {message}") + self.domain = domain class SHACLViolation(ConstraintViolation): + """Specialized exception for SHACL violations""" def __init__(self, violation_str: str): super().__init__("SHACL", violation_str) -def check_shacl_constraints(graph: Graph, shacl_dict: Dict[str, str], quiet=False) -> bool: - """ - :param graph: rdflib.Graph to be checked - :param shacl_dict: mapping from SHACL path to graph format, e.g. URL -> "turtle" - :param quiet: if true will not throw an exception +def check_shacl_constraints(graph: Graph, shacl_dict: dict[str, str], quiet:bool = False) -> bool: + """Check a graph against a collection of SHACL constraints + + Parameters: + graph: rdflib.Graph to be checked + shacl_dict: mapping from SHACL path to graph format, e.g. URL -> "turtle" + quiet: if true will not throw an exception """ shacl_g = Dataset() for mm_url, fmt in shacl_dict.items(): diff --git a/src/rdf_utils/models/__init__.py b/src/rdf_utils/models/__init__.py index e69de29..5bc4a1e 100644 --- a/src/rdf_utils/models/__init__.py +++ b/src/rdf_utils/models/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: MPL-2.0 +"""Common processing utilites for RDF graph models""" diff --git a/src/rdf_utils/models/common.py b/src/rdf_utils/models/common.py index a48a764..79463d2 100644 --- a/src/rdf_utils/models/common.py +++ b/src/rdf_utils/models/common.py @@ -1,15 +1,17 @@ # SPDX-License-Identifier: MPL-2.0 -from typing import Any, Dict, Optional, Protocol +from typing import Any, Optional, Protocol from rdflib import URIRef, Graph, RDF def get_node_types(graph: Graph, node_id: URIRef) -> set[URIRef]: - """! - Get all types of a node in an RDF graph. + """Get all types of a node in an RDF graph. - @param graph RDF graph to look up node types from - @param node_id URIRef of target node - @return set of the node's types as URIRef's + Parameters: + graph: RDF graph to look up node types from + node_id: URIRef of target node + + Returns: + A set of the node's types as URIRef's """ types = set() for type_id in graph.objects(subject=node_id, predicate=RDF.type): @@ -19,11 +21,20 @@ def get_node_types(graph: Graph, node_id: URIRef) -> set[URIRef]: class ModelBase(object): - """All models should have an URI as ID and types""" + """Base object for RDF graph models, enforcing all models to have an URI as ID and types. + + Attributes: + id: the model's ID as an URI + types: the model's types + Parameters: + node_id: URI of the model node in the graph + graph: RDF graph for loading types if `types` is not specified + types: the model's types + """ id: URIRef types: set[URIRef] - _attributes: Dict[URIRef, Any] + _attributes: dict[URIRef, Any] def __init__( self, node_id: URIRef, graph: Optional[Graph] = None, types: Optional[set[URIRef]] = None @@ -41,12 +52,15 @@ def __init__( self._attributes = {} def has_attr(self, key: URIRef) -> bool: + """Check if the model has an attribute.""" return key in self._attributes def set_attr(self, key: URIRef, val: Any) -> None: + """Set an attribute value.""" self._attributes[key] = val def get_attr(self, key: URIRef) -> Optional[Any]: + """Get an attribute value.""" if key not in self._attributes: return None @@ -54,18 +68,32 @@ def get_attr(self, key: URIRef) -> Optional[Any]: class AttrLoaderProtocol(Protocol): + """Protocol for functions that load model attributes.""" def __call__(self, graph: Graph, model: ModelBase, **kwargs: Any) -> None: ... class ModelLoader(object): + """Class for dynimcally adding functions to load different model attributes.""" _loaders: list[AttrLoaderProtocol] def __init__(self) -> None: self._loaders = [] def register(self, loader: AttrLoaderProtocol) -> None: + """Add a new attribute loader function. + + Parameters: + loader: attribute loader function + """ self._loaders.append(loader) - def load_attributes(self, graph: Graph, model: ModelBase, **kwargs: Any): + def load_attributes(self, graph: Graph, model: ModelBase, **kwargs: Any) -> None: + """Load all attributes in the graph into a model with the registered loaders. + + Parameters: + graph: RDF graph for loading attributes + model: Model object to load attributes into + kwargs: any keyword arguments to pass into the loader functions + """ for loader in self._loaders: loader(graph=graph, model=model, **kwargs) diff --git a/src/rdf_utils/models/distribution.py b/src/rdf_utils/models/distribution.py index 466bd6b..04aa772 100644 --- a/src/rdf_utils/models/distribution.py +++ b/src/rdf_utils/models/distribution.py @@ -37,6 +37,15 @@ def _get_float_from_literal(literal: Literal) -> float: class DistributionModel(ModelBase): + """Model object for probability distributions + + Attributes: + distrib_type: the type of distribution to be handled + + Parameters: + distrib_id: URI of the distribution in the graph + graph: RDF graph for loading attributes + """ distrib_type: URIRef def __init__(self, distrib_id: URIRef, graph: Graph) -> None: @@ -175,11 +184,14 @@ def _load_normal_distrib_attrs(self, graph: Graph) -> None: def distrib_from_sampled_quantity(quantity_id: URIRef, graph: Graph) -> DistributionModel: - """!Extract a distribution from a :SampledQuantity node through :from-distribution path. + """Extract a distribution from a :SampledQuantity node through :from-distribution path. - @param quantity_id URIRef of the :SampledQuantity node - @param graph rdflib.Graph to look for distribution nodes and attributes - @return distribution model object + Parameters: + quantity_id: URI of the :SampledQuantity node + graph: RDF graph to look for distribution nodes and attributes + + Returns: + distribution model object """ distrib_id = graph.value(subject=quantity_id, predicate=URI_DISTRIB_PRED_FROM_DISTRIB) assert isinstance( @@ -191,12 +203,16 @@ def distrib_from_sampled_quantity(quantity_id: URIRef, graph: Graph) -> Distribu def sample_from_distrib( distrib: DistributionModel, size: Optional[int | tuple[int, ...]] = None ) -> Any: - """!Sample from a distribution model based on its type. + """Sample from a distribution model based on its type. + + Parameters: + distrib: distribution model + size: Size of the sample, which matches size argument in numpy.random calls. + Will be ignored for random rotations at the moment. For uniform and normal distribs, + tuple size should have last dimension matching the distrib's dimension. - @param distrib distribution model - @param size Size of the sample, which matches size argument in numpy.random calls. - Will be ignored for random rotations at the moment. For uniform and normal distribs, - tuple size should have last dimension matching the distrib's dimension. + Returns: + distribution sample with dimension matching given size """ if URI_DISTRIB_TYPE_UNIFORM_ROT in distrib.types: try: diff --git a/src/rdf_utils/models/event_loop.py b/src/rdf_utils/models/event_loop.py index d1ddf90..579f69c 100644 --- a/src/rdf_utils/models/event_loop.py +++ b/src/rdf_utils/models/event_loop.py @@ -18,6 +18,15 @@ class EventReactionModel(ModelBase): + """Model for reactions to an event. + + Attributes: + event_id: URI of the event to react to + + Parameters: + reaction_id: URI of the reaction model + graph: RDF graph to load relevant attributes + """ event_id: URIRef def __init__(self, reaction_id: URIRef, graph: Graph) -> None: @@ -31,6 +40,15 @@ def __init__(self, reaction_id: URIRef, graph: Graph) -> None: class FlagReactionModel(ModelBase): + """Model for reactions to a flag. + + Attributes: + flag_id: URI of the flag to react to + + Parameters: + reaction_id: URI of the reaction model + graph: RDF graph to load relevant attributes + """ flag_id: URIRef def __init__(self, reaction_id: URIRef, graph: Graph) -> None: @@ -44,6 +62,18 @@ def __init__(self, reaction_id: URIRef, graph: Graph) -> None: class EventLoopModel(ModelBase): + """Model of an event loop containing models of reactions to events and flags. + + Attributes: + events_triggered: if true should notify that an event is triggered in the last loop + flag_values: value of flag in the last loop + event_reactions: reaction models to events + flag_reactions: reaction models to flags + + Parameters: + el_id: URI of event loop + graph: graph for loading attributes + """ events_triggered: dict[URIRef, bool] flag_values: dict[URIRef, bool] event_reactions: dict[URIRef, EventReactionModel] diff --git a/src/rdf_utils/models/python.py b/src/rdf_utils/models/python.py index d83404d..f0067a1 100644 --- a/src/rdf_utils/models/python.py +++ b/src/rdf_utils/models/python.py @@ -12,17 +12,39 @@ def import_attr_from_node(graph: Graph, uri: URIRef | str) -> Any: + """Import a Python module's attribute from an RDF graph using importlib + + Parameters: + graph: RDF graph to load relevant info + uri: URI of the `ModuleAttribute` node + + Returns: + The module attribute, e.g. class or function + """ if isinstance(uri, str): uri = URIRef(uri) module_name = str(graph.value(uri, URI_PY_PRED_MODULE_NAME)) attr_name = str(graph.value(uri, URI_PY_PRED_ATTR_NAME)) + return getattr(import_module(module_name), attr_name, None) -def load_py_module_attr(graph: Graph, model: ModelBase, **kwargs: Any) -> None: +def load_py_module_attr(graph: Graph, model: ModelBase, quiet: bool = True) -> None: + """Load relevant attributes of a `ModuleAttribute` node into a model object. + + Parameters: + graph: RDF graph to load relevant info. + model: The model object. + quiet: If True won't raise an exception + + Raises: + RuntimeError: if not quiet and model object does not have `ModuleAttribute` type + """ if URI_PY_TYPE_MODULE_ATTR not in model.types: - return + if quiet: + return + raise RuntimeError(f"load_py_module_attr: '{model.id}' is not a {URI_PY_TYPE_MODULE_ATTR}") module_name = graph.value(model.id, URI_PY_PRED_MODULE_NAME) assert ( @@ -38,6 +60,15 @@ def load_py_module_attr(graph: Graph, model: ModelBase, **kwargs: Any) -> None: def import_attr_from_model(model: ModelBase) -> Any: + """Import a Python module's attribute from a model object. + Assuming `load_py_module_attr` was already called on the object. + + Parameters: + model: Model object containing relevant info for a `ModuleAttribute` + + Returns: + The module attribute, e.g. class or function + """ assert ( URI_PY_TYPE_MODULE_ATTR in model.types ), f"model '{model.id}' doesn't have type '{URI_PY_TYPE_MODULE_ATTR}'" diff --git a/src/rdf_utils/namespace.py b/src/rdf_utils/namespace.py index 250ecee..ca4791d 100644 --- a/src/rdf_utils/namespace.py +++ b/src/rdf_utils/namespace.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MPL-2.0 +"""Defining namespaces used by RDF models.""" from rdflib import Namespace from rdf_utils.uri import ( URI_MM_AGN, diff --git a/src/rdf_utils/naming.py b/src/rdf_utils/naming.py index cd478b8..4d89bcb 100644 --- a/src/rdf_utils/naming.py +++ b/src/rdf_utils/naming.py @@ -15,8 +15,14 @@ def get_valid_name(name: str, replacement_dict: dict) -> str: Remove leading and trailing spaces; convert other spaces to underscores; and remove anything that is not an alphanumeric, dash, underscore, or dot. - >>> get_valid_filename("john's portrait in 2004.jpg") - 'johns_portrait_in_2004.jpg' + + Parameters: + name: String name to be converted + replacement_dict: Maps special characters to acceptable replacements + + Examples: + >>> get_valid_filename("john's portrait in 2004.jpg") + 'johns_portrait_in_2004.jpg' """ s = str(name).strip() for char in replacement_dict: @@ -31,8 +37,10 @@ def get_valid_name(name: str, replacement_dict: dict) -> str: def get_valid_filename(name: str) -> str: + """Convert strings to valid file names. Calls `get_valid_name`""" return get_valid_name(name, __FILENAME_REPLACEMENTS) def get_valid_var_name(name: str) -> str: + """Convert strings to valid variable names. Calls `get_valid_name`""" return get_valid_name(name, __VAR_NAME_REPLACEMENTS) diff --git a/src/rdf_utils/resolver.py b/src/rdf_utils/resolver.py index 5dce999..1f50cca 100644 --- a/src/rdf_utils/resolver.py +++ b/src/rdf_utils/resolver.py @@ -18,16 +18,17 @@ class IriToFileResolver(urllib.request.OpenerDirector): """ A `urllib.request.OpenerDirector` that remaps specific URLs to local files. + + Parameters: + url_map: Mapping from a prefix of a URL to a local location. + For example, `{ "http://example.org/": "foo/bar/" }` would remap any + urllib open request for any resource under `http://example.org/` + to a local directory `foo/bar/`. + download: If true and the mapped local file does not exist, will attempt to download + to the mapped location. """ def __init__(self, url_map: dict, download: bool = True): - """ - A key-value pair in `url_map` specifies a prefix of a URL to a local location. - For example, `{ "http://example.org/": "foo/bar/" }` would remap any urllib open request - for any resource under "http://example.org/" to a local directory "foo/bar/". - If the local file does not exist and `download` is True, attempt to download the file - to the corresponding local location. - """ super().__init__() self.default_opener = urllib.request.build_opener() self.url_map = url_map @@ -88,13 +89,20 @@ def install_resolver( resolver: Optional[urllib.request.OpenerDirector] = None, url_map: Optional[dict] = None, download: bool = True, -): - """ - Note that only a single opener can be globally installed in urllib. - Only the latest installed resolver will be active. - If no `resolver` is specified, the default behaviour using `IriToFileResolver` is to - download the requested files to the user cache directory using `platformdirs`. - For Linux this should be `$HOME/.cache/rdf-utils/`. +) -> None: + """Implements default behaviours for resolver installation + + Parameters: + resolver: Resolver to install. If none specified, the default behaviour + (using `IriToFileResolver`) is to download the requested files to the + user cache directory using `platformdirs`. + For Linux this should be `$HOME/.cache/rdf-utils/`. + url_map: URL to local path mapping to pass to `IriToFileResolver` + download: Download file if true + + Note: + Only a single opener can be globally installed in urllib. + Only the latest installed resolver will be active. """ if resolver is None: if url_map is None: diff --git a/src/rdf_utils/uri.py b/src/rdf_utils/uri.py index b35c6ec..3275e84 100644 --- a/src/rdf_utils/uri.py +++ b/src/rdf_utils/uri.py @@ -33,13 +33,18 @@ def try_expand_curie( ns_manager: NamespaceManager, curie_str: str, quiet: bool = False ) -> Optional[URIRef]: - """!Execute rdflib `expand_curie` with exception handling + """Execute rdflib `expand_curie` with exception handling - @param ns_manager NamespaceManager object, usually can use the one in the Graph object - @param curie_str the short URI string to be expanded - @param quiet if False will raise ValueError, else return None - @return expanded URIRef or None - @exception ValueError + Parameters: + ns_manager: NamespaceManager object, usually can use the one in the Graph object + curie_str: The short URI string to be expanded + quiet: If False will raise ValueError, else return None + + Returns: + Expanded URIRef or None + + Raises: + ValueError: When not `quiet` and URI cannot be expanded using the given `ns_manager` """ try: uri = ns_manager.expand_curie(curie_str)