diff --git a/alpaca/ontology/annotation.py b/alpaca/ontology/annotation.py index 1906b9e..0bd2228 100644 --- a/alpaca/ontology/annotation.py +++ b/alpaca/ontology/annotation.py @@ -22,10 +22,13 @@ `__ontology__` dictionary will define the main URI describing either the function or the data object: -* 'function' : str - A URI to the ontology class representing the Python function. -* 'data_object' : str - A URI to the ontology class representing the Python data object. +* 'function' : str or list of str + A URI to the ontology class representing the Python function. Multiple URIs + can be passed as a list, if the function is represented by multiple classes. +* 'data_object' : str or list of str + A URI to the ontology class representing the Python data object. Multiple + URIs can be passed as a list, if the object is represented by multiple + classes. Additional annotations can be stored depending on whether a function or data object is being annotated. @@ -35,11 +38,12 @@ * 'arguments' : dict A dictionary where the keys are argument names (cf. the function - declaration in the `def` statement) and the values are the URI - to the ontology class representing the argument. + declaration in the `def` statement) and the values are the URI(s) + to the ontology class(es) representing the argument. * 'returns' : dict A dictionary where the keys are function outputs, and the values define the - URIs to the ontology classes that represent each output identified by a key. + URI(s) to the ontology class(es) representing each output identified by a + key. The keys in the `returns` dictionary can have three possible values: 1. a string with one output name (if this is the name of an argument, cf. the function declaration in the `def` statement), which assumes that a @@ -63,11 +67,11 @@ * 'attributes' : dict A dictionary where the keys are object attribute names and the values are - the URI to the ontology class representing the attribute. + the URI(s) to the ontology class(es) representing the attribute. * 'annotations' : dict A dictionary where the keys are annotation names and the values are the - URI to the ontology class representing the annotation. Annotations are - key-pair values specified in dictionaries stored as one attribute of the + URI(s) to the ontology class(es) representing the annotation. Annotations + are key-pair values specified in dictionaries stored as one attribute of the object (e.g., `obj.annotations`). Finally, the ontology annotations can be defined using namespaces so that the @@ -230,8 +234,8 @@ def get_container_returns(self): def get_uri(self, information_type, element=None): if information_type in VALID_OBJECTS: - # Information on 'function' and 'data_object' are strings, stored - # directly as attributes + # Information on 'function' and 'data_object' are strings or + # lists, stored directly as attributes information_value = getattr(self, information_type) else: # Specific information of 'function' and 'data_object' are @@ -249,13 +253,26 @@ def get_uri(self, information_type, element=None): if not information_value: return None - if (information_value[0], information_value[-1]) == ("<", ">"): - # This is a URI - return rdflib.URIRef(information_value[1:-1]) - - # If not full URIs, information must be CURIEs. Get the URIRef. - prefix, value = information_value.split(":") - return self.namespaces[prefix][value] + if not isinstance(information_value, list): + information_value = [information_value] + + # Process URI(s) to get `rdflib.URIRef` elements, resolving any + # namespace. + uris = [] + for uri in information_value: + if (uri[0], uri[-1]) == ("<", ">"): + # This is a full URI + uris.append(rdflib.URIRef(uri[1:-1])) + else: + # If not full URIs, information must be CURIEs. + # Get the `URIRef` from the namespace. + prefix, value = uri.split(":") + uris.append(self.namespaces[prefix][value]) + + if len(uris) == 1: + # Return annotation with a single URI directly + return uris[0] + return uris def __repr__(self): repr_str = "OntologyInformation(" diff --git a/alpaca/serialization/prov.py b/alpaca/serialization/prov.py index 702d800..4646e89 100644 --- a/alpaca/serialization/prov.py +++ b/alpaca/serialization/prov.py @@ -139,11 +139,15 @@ def _add_Function(self, function_info): Literal(function_info.version))) return uri - def _add_ontology_information(self, uri, ontology_info, information_type, - element=None): - class_iri = ontology_info.get_uri(information_type, element) - if class_iri: - self.graph.add((uri, RDF.type, class_iri)) + def _add_ontology_information(self, target_uri, ontology_info, + information_type, element=None): + class_info = ontology_info.get_uri(information_type, element) + if class_info: + if isinstance(class_info, list): + for class_uri in class_info: + self.graph.add((target_uri, RDF.type, class_uri)) + else: + self.graph.add((target_uri, RDF.type, class_info)) def _add_FunctionExecution(self, script_info, session_id, execution_id, function_info, params, execution_order, diff --git a/alpaca/test/test_ontology_annotation.py b/alpaca/test/test_ontology_annotation.py index 6b01ba1..0750e48 100644 --- a/alpaca/test/test_ontology_annotation.py +++ b/alpaca/test/test_ontology_annotation.py @@ -54,6 +54,18 @@ def process(input, param_1): } +@Provenance(inputs=['input']) +def process_one_and_process_two(input, param_1): + return OutputObject("SpikeTrain#1", 45) + +process_one_and_process_two.__wrapped__.__ontology__ = { + "function": ["ontology:Process1Function", "ontology:Process2Function"], + "namespaces": EXAMPLE_NS, + "arguments": {'param_1': "ontology:Parameter"}, + "returns": {0: "ontology:ProcessedData"} +} + + @Provenance(inputs=['input']) def process_multiple(input, param_1): return "not_annotated", OutputObject("SpikeTrain#2", 34) @@ -259,6 +271,29 @@ def test_annotation_function(self): "returns={0: 'ontology:ProcessedData'})" ) + def test_annotation_function_multiple_annotations(self): + self.assertIsNotNone( + _OntologyInformation.get_ontology_information( + process_one_and_process_two)) + info = _OntologyInformation(process_one_and_process_two) + self.assertListEqual( + info.get_uri("function"), + [URIRef("http://example.org/ontology#Process1Function"), + URIRef("http://example.org/ontology#Process2Function")]) + self.assertEqual( + info.get_uri("arguments", "param_1"), + URIRef("http://example.org/ontology#Parameter")) + self.assertEqual( + info.get_uri("returns", 0), + URIRef("http://example.org/ontology#ProcessedData")) + self.assertEqual( + str(info), + "OntologyInformation(function='['ontology:Process1Function', " + "'ontology:Process2Function']', " + "arguments={'param_1': 'ontology:Parameter'}, " + f"namespaces={{'ontology': {repr(self.ONTOLOGY)}}}, " + "returns={0: 'ontology:ProcessedData'})") + def test_annotation_function_multiple(self): self.assertIsNotNone( _OntologyInformation.get_ontology_information(process_multiple)) @@ -409,6 +444,98 @@ def test_provenance_annotation(self): self.assertTrue((output_node, PROV.wasDerivedFrom, input_node) in prov_graph) + def test_provenance_multiple_annotations(self): + activate(clear=True) + input_object = InputObject() + output_object = process_one_and_process_two(input_object, 34) + deactivate() + + prov_data = save_provenance() + + # Read PROV information as RDF + prov_graph = Graph() + with io.StringIO(prov_data) as data_stream: + prov_graph.parse(data_stream, format='turtle') + + # Check that the annotations exist (1 per class is expected) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.Parameter))) + ), 1) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.Process1Function))) + ), 1) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.Process2Function))) + ), 1) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.ProcessedData))) + ), 1) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.InputObject))) + ), 1) + self.assertEqual( + len(list(prov_graph.triples( + (None, RDF.type, self.ONTOLOGY.OutputObject))) + ), 1) + + # FunctionExecution is ProcessFunction + execution_uri = list( + prov_graph.subjects(RDF.type, ALPACA.FunctionExecution))[0] + self.assertTrue((execution_uri, + RDF.type, + self.ONTOLOGY.Process1Function) in prov_graph) + + self.assertTrue((execution_uri, + RDF.type, + self.ONTOLOGY.Process2Function) in prov_graph) + + # Check parameter name + parameter_node = list( + prov_graph.subjects(RDF.type, self.ONTOLOGY.Parameter))[0] + self.assertTrue((parameter_node, + ALPACA.pairName, Literal("param_1")) in prov_graph) + self.assertTrue((parameter_node, + ALPACA.pairValue, Literal(34)) in prov_graph) + + # Check returned value + output_node = list( + prov_graph.subjects(RDF.type, self.ONTOLOGY.ProcessedData))[0] + self.assertTrue((output_node, + PROV.wasGeneratedBy, execution_uri) in prov_graph) + self.assertTrue((output_node, + RDF.type, ALPACA.DataObjectEntity) in prov_graph) + self.assertTrue((output_node, + RDF.type, self.ONTOLOGY.OutputObject) in prov_graph) + + # Check attributes of returned value + expected_attributes = { + 'name': "SpikeTrain#1", + 'channel': 45, + } + for attribute in prov_graph.objects(output_node, ALPACA.hasAttribute): + name = prov_graph.value(attribute, ALPACA.pairName).toPython() + value = prov_graph.value(attribute, ALPACA.pairValue).toPython() + self.assertEqual(value, expected_attributes[name]) + + # Check if attribute annotation is present for `name` + if name == 'name': + self.assertTrue((attribute, RDF.type, self.ONTOLOGY.Attribute) + in prov_graph) + + # Check input value + input_node = list( + prov_graph.subjects(RDF.type, self.ONTOLOGY.InputObject))[0] + self.assertTrue((execution_uri, PROV.used, input_node) in prov_graph) + self.assertTrue((input_node, + RDF.type, ALPACA.DataObjectEntity) in prov_graph) + self.assertTrue((output_node, + PROV.wasDerivedFrom, input_node) in prov_graph) + def test_provenance_annotation_multiple_returns(self): activate(clear=True) input_object = InputObject()