Skip to content

Commit

Permalink
Merge pull request #90 from pyiron/add_parsers
Browse files Browse the repository at this point in the history
Add parsers
  • Loading branch information
samwaseda authored Jan 21, 2025
2 parents 14d6681 + 21112ec commit 071ac40
Show file tree
Hide file tree
Showing 7 changed files with 317 additions and 6 deletions.
4 changes: 4 additions & 0 deletions .binder/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ dependencies:
- owlready2 =0.47
- pandas =2.2.3
- pint =0.24.4
- semantikon =0.0.9
- pyiron_workflow =0.11.2
- rdflib =7.1.1
- owlrl =7.1.2
- pyiron_atomistics =0.6.19
- pyiron-data =0.0.30
- lammps =2024.02.07=*_openmpi_*
6 changes: 5 additions & 1 deletion .ci_support/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ dependencies:
- openjdk
- owlready2 =0.47
- pandas =2.2.3
- pint =0.24.4
- pint =0.24.4
- semantikon =0.0.9
- pyiron_workflow =0.11.2
- rdflib =7.1.1
- owlrl =7.1.2
12 changes: 8 additions & 4 deletions .ci_support/lower_bound.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ dependencies:
- numpy =1.26.3
- openjdk
- owlready2 =0.46
- pandas =2.1.4
- pint =0.23
- pyiron_atomistics =0.5.0
- pyiron-data =0.0.26
- pandas =2.2.0
- pint =0.24
- pyiron_atomistics =0.6.19
- pyiron-data =0.0.26
- semantikon =0.0.9
- pyiron_workflow =0.11.0
- rdflib =7.1.1
- owlrl =7.1.2
3 changes: 2 additions & 1 deletion .github/workflows/push-pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ jobs:
uses: pyiron/actions/.github/workflows/[email protected]
secrets: inherit
with:
python-version-alt3: 'exclude' # No python 3.9
docs-env-files: .ci_support/environment.yml .ci_support/environment-docs.yml
notebooks-env-files: .ci_support/environment.yml .ci_support/environment-pyiron_atomistics.yml .ci_support/environment-lammps.yml
tests-env-files: .ci_support/environment.yml .ci_support/environment-pyiron_atomistics.yml
alternate-tests-env-files: .ci_support/lower_bound.yml
alternate-tests-env-files: .ci_support/lower_bound.yml
4 changes: 4 additions & 0 deletions docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ dependencies:
- owlready2 =0.47
- pandas =2.2.3
- pint =0.24.4
- semantikon =0.0.9
- pyiron_workflow =0.11.2
- rdflib =7.1.1
- owlrl =7.1.2
- sphinxcontrib-mermaid
168 changes: 168 additions & 0 deletions pyiron_ontology/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from semantikon.converter import parse_input_args, parse_output_args
from rdflib import Graph, Literal, RDF, RDFS, URIRef, OWL
from pyiron_workflow import NOT_DATA


def get_source_output(var):
if not var.connected:
return None
connection = var.connections[0]
return f"{connection.owner.label}.outputs.{connection.label}"


def get_inputs_and_outputs(node):
"""
Read input and output arguments with their type hints and return a
dictionary containing all input output information
Args:
node (pyiron_workflow.nodes.Node): node to be parsed
Returns:
(dict): dictionary containing input output args, type hints, values
and variable names
"""
inputs = parse_input_args(node.node_function)
outputs = parse_output_args(node.node_function)
if isinstance(outputs, dict):
outputs = (outputs,)
elif outputs is None:
outputs = len(node.outputs.labels) * ({},)
outputs = {key: out for key, out in zip(node.outputs.labels, outputs)}
for key, value in node.inputs.items():
if inputs[key] is None:
inputs[key] = {}
inputs[key]["value"] = value.value
inputs[key]["var_name"] = key
inputs[key]["connection"] = get_source_output(value)
for key, value in node.outputs.to_value_dict().items():
outputs[key]["value"] = value
outputs[key]["var_name"] = key
return {
"inputs": inputs,
"outputs": outputs,
"function": node.node_function.__name__,
"label": node.label,
}


def get_triples(
data,
EX,
hasSourceFunction=None,
hasUnits=None,
inheritsPropertiesFrom=None,
update_query=True,
):
if hasSourceFunction is None:
hasSourceFunction = EX.hasSourceFunction
if hasUnits is None:
hasUnits = EX.hasUnits
if inheritsPropertiesFrom is None:
inheritsPropertiesFrom = EX.inheritsPropertiesFrom
graph = Graph()
label_def_triple = (EX[data["label"]], RDF.type, OWL.NamedIndividual)
# Triple already exists
if len(list(graph.triples(label_def_triple))) > 0:
return graph
graph.add(label_def_triple)
graph.add((EX[data["label"]], hasSourceFunction, EX[data["function"]]))
for io_ in ["inputs", "outputs"]:
for key, d in data[io_].items():
full_key = data["label"] + f".{io_}." + key
label = EX[full_key]
graph.add((label, RDFS.label, Literal(full_key)))
if d.get("uri", None) is not None:
graph.add((label, RDF.type, d["uri"]))
if d.get("value", NOT_DATA) is not NOT_DATA:
graph.add((label, RDF.value, Literal(d["value"])))
graph.add((label, EX[io_[:-1] + "Of"], EX[data["label"]]))
if d.get("units", None) is not None:
graph.add((label, hasUnits, EX[d["units"]]))
if d.get("connection", None) is not None:
graph.add((label, inheritsPropertiesFrom, EX[d["connection"]]))
for t in _get_triples_from_restrictions(d, EX):
graph.add(_parse_triple(t, EX, label=label, data=data))
if update_query:
inherit_properties(graph, EX)
return graph


def _get_triples_from_restrictions(data, EX):
triples = []
if data.get("restriction", None) is not None:
triples = restriction_to_triple(data["restriction"])
if data.get("triple", None) is not None:
if isinstance(data["triple"][0], tuple | list):
triples.extend(list(data["triple"]))
else:
triples.extend([data["triple"]])
return triples


def restriction_to_triple(restriction):
triples = []
assert isinstance(restriction, tuple) and isinstance(restriction[0], tuple)
if not isinstance(restriction[0][0], tuple):
restriction = (restriction,)
for r in restriction:
assert len(r[0]) == 2
label = r[0][1] + "Restriction"
triples.append((label, RDF.type, OWL.Restriction))
for rr in r:
triples.append((label, rr[0], rr[1]))
triples.append((RDF.type, label))
return triples


def _parse_triple(triple, EX, label=None, data=None):
if len(triple) == 2:
subj, pred, obj = label, triple[0], triple[1]
elif len(triple) == 3:
subj, pred, obj = triple
else:
raise ValueError("Triple must have 2 or 3 elements")
if obj.startswith("inputs.") or obj.startswith("outputs."):
obj = data["label"] + "." + obj
if not isinstance(obj, URIRef):
obj = EX[obj]
return subj, pred, obj


def inherit_properties(graph, NS, n=None):
update_query = (
f"PREFIX ns: <{NS}>",
f"PREFIX rdfs: <{RDFS}>",
f"PREFIX rdf: <{RDF}>",
"",
"INSERT {",
" ?subject ?p ?o .",
"}",
"WHERE {",
" ?subject ns:inheritsPropertiesFrom ?target .",
" ?target ?p ?o .",
" FILTER(?p != ns:inheritsPropertiesFrom)",
" FILTER(?p != rdfs:label)",
" FILTER(?p != rdf:value)",
" FILTER(?p != rdf:type)",
"}",
)
if n is None:
n = len(list(graph.triples((None, NS.inheritsPropertiesFrom, None))))
for _ in range(n):
graph.update("\n".join(update_query))


def validate_values(graph):
missing_triples = []
for restriction in graph.subjects(RDF.type, OWL.Restriction):
on_property = graph.value(restriction, OWL.onProperty)
some_values_from = graph.value(restriction, OWL.someValuesFrom)
if on_property and some_values_from:
for cls in graph.subjects(OWL.equivalentClass, restriction):
for instance in graph.subjects(RDF.type, cls):
if not (instance, on_property, some_values_from) in graph:
missing_triples.append(
(instance, on_property, some_values_from)
)
return missing_triples
126 changes: 126 additions & 0 deletions tests/unit/test_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import unittest
from owlrl import DeductiveClosure, OWLRL_Semantics
from rdflib import Graph, OWL, RDF
from pyiron_ontology.parser import get_inputs_and_outputs, get_triples, inherit_properties, validate_values
from pyiron_workflow import Workflow
from semantikon.typing import u
from rdflib import Namespace


EX = Namespace("http://example.org/")


@Workflow.wrap.as_function_node("speed")
def calculate_speed(
distance: u(float, units="meter") = 10.0,
time: u(float, units="second") = 2.0,
) -> u(
float,
units="meter/second",
triple=(
(EX.isOutputOf, "inputs.time"),
(EX.subject, EX.predicate, EX.object)
)
):
return distance / time


@Workflow.wrap.as_function_node("result")
def add(a: float, b: float) -> u(float, triple=(EX.HasOperation, EX.Addition)):
return a + b


@Workflow.wrap.as_function_node("result")
def multiply(a: float, b: float) -> u(
float,
triple=(
(EX.HasOperation, EX.Multiplication),
(EX.inheritsPropertiesFrom, "inputs.a")
)
):
return a * b


@Workflow.wrap.as_function_node("result")
def correct_analysis(
a: u(
float,
restriction=(
(OWL.onProperty, EX.HasOperation),
(OWL.someValuesFrom, EX.Addition)
)
)
) -> float:
return a


@Workflow.wrap.as_function_node("result")
def wrong_analysis(
a: u(
float,
restriction=(
(OWL.onProperty, EX.HasOperation),
(OWL.someValuesFrom, EX.Division)
)
)
) -> float:
return a


class TestParser(unittest.TestCase):
def test_parser(self):
c = calculate_speed()
output_dict = get_inputs_and_outputs(c)
for label in ["inputs", "outputs", "function", "label"]:
self.assertIn(label, output_dict)

def test_triples(self):
speed = calculate_speed()
data = get_inputs_and_outputs(speed)
graph = get_triples(data, EX)
self.assertGreater(
len(list(graph.triples((None, EX.hasUnits, EX["meter/second"])))), 0
)
self.assertEqual(
len(
list(
graph.triples(
(None, EX.isOutputOf, EX["calculate_speed.inputs.time"])
)
)
),
1
)
self.assertEqual(
len(list(graph.triples((EX.subject, EX.predicate, EX.object)))),
1
)

def test_correct_analysis(self):
def get_graph(wf):
graph = Graph()
graph.add((EX.HasOperation, RDF.type, RDF.Property))
graph.add((EX.Addition, RDF.type, OWL.Class))
graph.add((EX.Multiplication, RDF.type, OWL.Class))
for value in wf.children.values():
data = get_inputs_and_outputs(value)
graph += get_triples(data, EX)
inherit_properties(graph, EX)
DeductiveClosure(OWLRL_Semantics).expand(graph)
return graph
wf = Workflow("correct_analysis")
wf.addition = add(a=1., b=2.)
wf.multiply = multiply(a=wf.addition, b=3.)
wf.analysis = correct_analysis(a=wf.multiply)
graph = get_graph(wf)
self.assertEqual(len(validate_values(graph)), 0)
wf = Workflow("wrong_analysis")
wf.addition = add(a=1., b=2.)
wf.multiply = multiply(a=wf.addition, b=3.)
wf.analysis = wrong_analysis(a=wf.multiply)
graph = get_graph(wf)
self.assertEqual(len(validate_values(graph)), 1)


if __name__ == "__main__":
unittest.main()

0 comments on commit 071ac40

Please sign in to comment.