Skip to content

Commit

Permalink
Merge pull request #1330 from cambridge-cares/dev-twa-mops
Browse files Browse the repository at this point in the history
Dev twa mops
  • Loading branch information
jb2197 authored Dec 19, 2024
2 parents 61dfe53 + 2b36b35 commit a309038
Show file tree
Hide file tree
Showing 31 changed files with 5,105 additions and 152 deletions.
27 changes: 27 additions & 0 deletions JPS_BASE_LIB/python_wrapper/docs/examples/ogm.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ For simplicity, `<https://www.theworldavatar.com/kg/yourontology/>` (**Note the
@prefix yo: <https://www.theworldavatar.com/kg/yourontology/> .
```

> NOTE: if you wish to develop this in a Jupyter notebook, you might find it helpful to set the ontology to development mode using `YourOntology.set_dev_mode()`, which will allow you re-run the cell once you made changes to your classes/properties without throwing an "class already registered" error. Once you are happy with your ontology and wish to switch back to production mode, you may do this via `YourOntology.set_prod_mode()`.

### Define a property (relationship)

To define custom object and data properties, the two base classes `ObjectProperty` and `DatatypeProperty` should be used respectively. It should be noted that the user is only required to specify the cardinality of these properties at the class defination, as their `rdfs:domain` and `rdfs:range` will be automatically handled by the class that utilises the defined properties.
Expand Down Expand Up @@ -347,6 +350,26 @@ another_object_of_one_concept = OneConcept.pull_from_kg(
> NOTE the developer should be aware of the `recursive_depth` that one is using to pull the triples from the knowledge graph.
#### NOTE when pulling instances with multiple `rdf:type` definitions

For instances defined with multiple `rdf:type`, this pulling function instantiates the Python object using the deepest subclass found in the intersection of the subclasses of the calling class and those specified by `rdf:type`. If multiple deepest subclasses coexist (e.g., when subclasses from different branches of the inheritance tree are identified), the code raises an error. To prevent this, you can pull the object directly using the desired subclass.

For a concrete example using the class hierarchy below, assume an instance is defined with `rdf:type` of both class `C` and `E`. Pulling this instance using `A.pull_from_kg()` will result in an error because both `C` and `E` are identified as potential classes for instantiation, but they belong to different branches of the inheritance tree. A workaround is to pull the instance explicitly using either class `C` or `E`. Alternatively, if a class `F` exist as subclass of both `C` and `E`, pulling the instance with `A.pull_from_kg()` would succeed, as class `F` would be identified as the new "deepest" subclass.

```mermaid
classDiagram
class A
class B
class C
class D
class E
A <|-- B
B <|-- C
A <|-- D
D <|-- E
```

### Update existing objects in triple store

To make changes to the local objects and update it in the triple store:
Expand Down Expand Up @@ -378,3 +401,7 @@ one_concept.revert_local_changes()

- How to generate Python script given an OWL file
- Add support for many-to-many cardinality constraints?
- Mermaid codes
- Type hint for object/datatype properties
- Allocate set or single instances when accessing object/datatype properties
- Handle rdf:type when it's a class
2 changes: 1 addition & 1 deletion JPS_BASE_LIB/python_wrapper/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='twa',
version='0.0.4',
version='0.0.6',
author='Jiaru Bai; Daniel Nurkowski',
author_email='[email protected]; [email protected]',
license='MIT',
Expand Down
2 changes: 1 addition & 1 deletion JPS_BASE_LIB/python_wrapper/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _get_service_url(service_name, url_route):
time.sleep(3)

if not service_available:
raise RuntimeError("Blazegraph service did not become available within the timeout period")
raise RuntimeError(f"Blazegraph service did not become available within the timeout period: {timeout} seconds")

return service_url
return _get_service_url
Expand Down
200 changes: 198 additions & 2 deletions JPS_BASE_LIB/python_wrapper/tests/test_base_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ class E(D):
pass


class For_Dev_Mode_Test(BaseClass):
rdfs_isDefinedBy = ExampleOntology


HasPart = TransitiveProperty.create_from_base('HasPart', ExampleOntology)


Expand Down Expand Up @@ -104,6 +108,29 @@ def init():
return a1, a2, a3, b, c, d


def test_dev_mode():
assert not ExampleOntology._dev_mode
assert not ExampleOntology.is_dev_mode()
ExampleOntology.set_dev_mode()
assert ExampleOntology._dev_mode
assert ExampleOntology.is_dev_mode()
ExampleOntology.set_prod_mode()
assert not ExampleOntology._dev_mode
assert not ExampleOntology.is_dev_mode()
with pytest.raises(ValueError) as e_info:
class For_Dev_Mode_Test(BaseClass):
rdfs_isDefinedBy = ExampleOntology
assert e_info.match('https://example.org/example/For_Dev_Mode_Test')
assert e_info.match('already exists in')
"""
E ValueError: Class with rdf_type https://example.org/example/For_Dev_Mode_Test already exists in
<class 'tests.test_base_ontology.ExampleOntology'>: <class 'tests.test_base_ontology.For_Dev_Mode_Test'>.
"""
ExampleOntology.set_dev_mode()
class For_Dev_Mode_Test(BaseClass):
rdfs_isDefinedBy = ExampleOntology


def test_retrieve_cardinality():
assert DataProperty_A.retrieve_cardinality() == (0, 1)
assert DataProperty_B.retrieve_cardinality() == (1, 1)
Expand Down Expand Up @@ -151,8 +178,8 @@ def test_basics():
a = A(data_property_a={'a'}, rdfs_comment='my comment', rdfs_label='my label')
assert a.data_property_a == {'a'}
assert a.rdfs_isDefinedBy.base_url in a.instance_iri
assert a.rdfs_comment == 'my comment'
assert a.rdfs_label == 'my label'
assert a.rdfs_comment == {'my comment'}
assert a.rdfs_label == {'my label'}
# test one can instantiate with a custom instance_iri
my_random_iri = f'https://{str(uuid.uuid4())}'
a_with_random_iri = A(data_property_a={'a'}, instance_iri=my_random_iri)
Expand Down Expand Up @@ -915,3 +942,172 @@ def test_all_triples_of_nodes():
assert (URIRef(d.instance_iri), URIRef(ObjectProperty_D_A.predicate_iri), URIRef(a1.instance_iri)) in g
# in total 6 triples
assert sum(1 for _ in g.triples((None, None, None))) == 6


def test_cls_rdfs_comment_label():
comments = ['comment1', 'comment2', 'comment3']
labels = ['label1', 'label2']

class TestRdfsCommentLabel(E):
rdfs_comment_clz = comments
rdfs_label_clz = labels

class TestRdfsCommentLabelDataProperty(DatatypeProperty):
rdfs_isDefinedBy = ExampleOntology
rdfs_comment_clz = comments
rdfs_label_clz = labels

class TestRdfsCommentLabelObjectProperty(ObjectProperty):
rdfs_isDefinedBy = ExampleOntology
rdfs_comment_clz = comments
rdfs_label_clz = labels

g = Graph()
g = TestRdfsCommentLabel._export_to_owl(g)
g = TestRdfsCommentLabelDataProperty._export_to_owl(g, set(), set())
g = TestRdfsCommentLabelObjectProperty._export_to_owl(g, set(), set())
# rdfs:comment triple
for comment in comments:
assert (URIRef(TestRdfsCommentLabel.rdf_type), URIRef(RDFS.comment), Literal(comment)) in g
assert (URIRef(TestRdfsCommentLabelDataProperty.predicate_iri), URIRef(RDFS.comment), Literal(comment)) in g
assert (URIRef(TestRdfsCommentLabelObjectProperty.predicate_iri), URIRef(RDFS.comment), Literal(comment)) in g
# rdfs:label triple
for label in labels:
assert (URIRef(TestRdfsCommentLabel.rdf_type), URIRef(RDFS.label), Literal(label)) in g
assert (URIRef(TestRdfsCommentLabelDataProperty.predicate_iri), URIRef(RDFS.label), Literal(label)) in g
assert (URIRef(TestRdfsCommentLabelObjectProperty.predicate_iri), URIRef(RDFS.label), Literal(label)) in g


def test_instances_with_multiple_rdf_type(initialise_sparql_client, recwarn):
a1, a2, a3, b, c, d = init()
# create data property and classes for this test
Data_Property_E_Sub = DatatypeProperty.create_from_base('Data_Property_E_Sub', ExampleOntology, 0, 2)
Data_Property_E_Para = DatatypeProperty.create_from_base('Data_Property_Parallel_To_E', ExampleOntology, 0, 2)
class E_Sub(E):
# this class is used to test the case that the object is pulled from the KG with the correct level of subclass
# i.e. if the object is instance of E but pulled using class D, it should NOT be pulled as E_Sub even E_Sub is a subclass of E
data_property_e_sub: Data_Property_E_Sub[str]
class E_Para(D):
data_property_e_para: Data_Property_E_Para[str]

# create an object e and e_sub and push it to the KG
INFO_NOT_LOST_FOR_E_SUB = 'this is to test information not lost for e_sub'
INFO_NOT_LOST_FOR_E_PARA = 'this is to test information not lost for parallel_to_e'
NEW_INFO_FOR_E_SUB = 'this is to test new information for e_sub'
NEW_INFO_FOR_E_PARA = 'this is to test new information for parallel_to_e'
e = E(object_property_d_a=[a1], object_property_d_c=[c])
e_sub = E_Sub(object_property_d_a=[a1], object_property_d_c=[c], data_property_e_sub=INFO_NOT_LOST_FOR_E_SUB)
e_para = E_Para(object_property_d_a=[a1], object_property_d_c=[c], data_property_e_para=INFO_NOT_LOST_FOR_E_PARA)
sparql_client = initialise_sparql_client
e.push_to_kg(sparql_client, -1)
e_sub.push_to_kg(sparql_client, -1)
e_para.push_to_kg(sparql_client, -1)

# now also insert the triples for additional rdf:type of e and e_sub due to subclassing
sparql_client.perform_update(f'insert data {{ <{e.instance_iri}> <{RDF.type.toPython()}> <{D.rdf_type}> }}')
sparql_client.perform_update(f'insert data {{ <{e_sub.instance_iri}> <{RDF.type.toPython()}> <{D.rdf_type}> }}')
sparql_client.perform_update(f'insert data {{ <{e_sub.instance_iri}> <{RDF.type.toPython()}> <{E.rdf_type}> }}')
# create addtional triples to make e_para also rdf:type of E_Sub, as well as data property for E_Sub
sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{RDF.type.toPython()}> <{E_Sub.rdf_type}> }}')
sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{Data_Property_E_Sub.predicate_iri}> "{INFO_NOT_LOST_FOR_E_SUB}" }}')

# after clearing the ontology object lookup, the object should be pulled from the KG again as a fresh object
KnowledgeGraph.clear_object_lookup()

# test 1: pull the object e using D class but it should return as E object
e_pulled = D.pull_from_kg([e.instance_iri], sparql_client, -1)[0]
# the id of the object should be different, meaning it's a different object
assert id(e) != id(e_pulled)
# the pulled object should also be instance of E, but not D
assert type(e_pulled) is E
assert type(e_pulled) is not D
assert type(e_pulled) is not E_Sub

# test 2: pull the object e using E_Sub class which should raise error
with pytest.raises(ValueError) as e_info:
E_Sub.pull_from_kg([e.instance_iri], sparql_client, -1)
assert e_info.match(f"""The instance {e.instance_iri} is of type """)
assert e_info.match(f"""{E.rdf_type}""")
assert e_info.match(f"""{D.rdf_type}""")
assert e_info.match(f"""it doesn't match the rdf:type of class {E_Sub.__name__} \({E_Sub.rdf_type}\)""")

# test 3: pull the object e_sub using D class which should return as E_Sub object
e_sub_pulled = D.pull_from_kg([e_sub.instance_iri], sparql_client, -1)[0]
# the id of the object should be different, meaning it's a different object
assert id(e_sub) != id(e_sub_pulled)
# the pulled object should also be instance of E_Sub, but not D
assert type(e_sub_pulled) is E_Sub
assert type(e_sub_pulled) is not D
assert type(e_sub_pulled) is not E
# the information should be preserved
assert e_sub_pulled.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB}

# test 4: pull the object e_para using D class should throw an error as there's no subclass relation between E_Sub and E_Para
with pytest.raises(ValueError) as e_info:
D.pull_from_kg([e_para.instance_iri], sparql_client, -1)
assert e_info.match(f"""The instance {e_para.instance_iri} is of type """)
assert e_info.match(f"""Amongst the pulling class {D.__name__} \({D.rdf_type}\)""")
assert e_info.match(f"""and its subclasses \({D.construct_subclass_dictionary()}\)""")
assert e_info.match(f"""{E_Sub.rdf_type}""")
assert e_info.match(f"""{E_Para.rdf_type}""")
assert e_info.match(f"""there exist classes that are not in the same branch of the inheritance tree""")
assert e_info.match(f"""please check the inheritance tree is correctly defined in Python""")

# test 5: pull the object e_para using E_Sub class should return as E_Sub object
e_para_pulled_as_e_sub = E_Sub.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0]
# the id of the object should be different, meaning it's a different object
assert id(e_para) != id(e_para_pulled_as_e_sub)
# the pulled object should also be instance of E_Sub, but not E_Para
assert type(e_para_pulled_as_e_sub) is E_Sub
assert type(e_para_pulled_as_e_sub) is not E_Para
assert type(e_para_pulled_as_e_sub) is not D
# the information should be preserved
assert e_para_pulled_as_e_sub.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB}
# if I now change the data property of E_Sub, it should not affect the data property of E_Para which is not pulled as part of e_para_pulled_as_e_sub
e_para_pulled_as_e_sub.data_property_e_sub.add(NEW_INFO_FOR_E_SUB)
e_para_pulled_as_e_sub.push_to_kg(sparql_client, -1)
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, NEW_INFO_FOR_E_SUB, XSD.string.toPython())
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, INFO_NOT_LOST_FOR_E_SUB, XSD.string.toPython())
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, INFO_NOT_LOST_FOR_E_PARA, XSD.string.toPython())

# test 6: pull the object e_para using E_Para class should return as E_Para object
e_para_pulled_as_e_para = E_Para.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0]
# the id of the object should be different, meaning it's a different object
assert id(e_para) != id(e_para_pulled_as_e_para)
# the pulled object should also be instance of E_Para, but not E_Sub
assert type(e_para_pulled_as_e_para) is E_Para
assert type(e_para_pulled_as_e_para) is not E_Sub
assert type(e_para_pulled_as_e_para) is not D
# the information should be preserved
assert e_para_pulled_as_e_para.data_property_e_para == {INFO_NOT_LOST_FOR_E_PARA}
# if I now change the data property of E_Para, it should not affect the data property of E_Sub which is not pulled as part of e_para_pulled_as_e_para
e_para_pulled_as_e_para.data_property_e_para.add(NEW_INFO_FOR_E_PARA)
e_para_pulled_as_e_para.push_to_kg(sparql_client, -1)
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, NEW_INFO_FOR_E_PARA, XSD.string.toPython())
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, INFO_NOT_LOST_FOR_E_PARA, XSD.string.toPython())
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, INFO_NOT_LOST_FOR_E_SUB, XSD.string.toPython())
assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, NEW_INFO_FOR_E_SUB, XSD.string.toPython())

# test 7: create a new class and make it subclass of E_Sub and E_Para, then pulling it with D class should return as the new class object
class New_E_Super_Sub(E_Para, E_Sub):
pass
# make the object e_para also rdf:type of New_E_Super_Sub
sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{RDF.type.toPython()}> <{New_E_Super_Sub.rdf_type}> }}')
e_super_sub = D.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0]
# the id of the object should be different, meaning it's a different object
assert id(e_para) != id(e_super_sub)
# the pulled object should also be instance of New_E_Super_Sub, but not E_Sub nor E_Para
assert type(e_super_sub) is New_E_Super_Sub
assert type(e_super_sub) is not E_Sub
assert type(e_super_sub) is not E_Para
assert type(e_super_sub) is not D
# the information should be preserved
assert e_super_sub.data_property_e_para == {INFO_NOT_LOST_FOR_E_PARA, NEW_INFO_FOR_E_PARA}
assert e_super_sub.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB, NEW_INFO_FOR_E_SUB}

# final check: all the warning messages when overiwritting the pulled object in the registry
assert len(recwarn) == 2
warning_message_1 = str(recwarn[0].message)
assert f"""An object with the same IRI {e_para.instance_iri} has already been instantiated and registered with type {E_Sub}. Replacing its regiatration now with type {E_Para}.""" in warning_message_1
warning_message_2 = str(recwarn[1].message)
assert f"""An object with the same IRI {e_para.instance_iri} has already been instantiated and registered with type {E_Para}. Replacing its regiatration now with type {New_E_Super_Sub}.""" in warning_message_2
2 changes: 1 addition & 1 deletion JPS_BASE_LIB/python_wrapper/twa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from twa.JPSGateway import JPSGateway

__version__ = "0.0.4"
__version__ = "0.0.6"
Loading

0 comments on commit a309038

Please sign in to comment.