diff --git a/JPS_BASE_LIB/python_wrapper/docs/examples/ogm.md b/JPS_BASE_LIB/python_wrapper/docs/examples/ogm.md index 72ceed32c71..d76aef002a7 100644 --- a/JPS_BASE_LIB/python_wrapper/docs/examples/ogm.md +++ b/JPS_BASE_LIB/python_wrapper/docs/examples/ogm.md @@ -51,6 +51,9 @@ For simplicity, `` (**Note the @prefix yo: . ``` +> NOTE: if you wish to develop this in a Jupyter notebook, you might find it helpful to set the ontology to development mode using `YourOntology.set_dev_mode()`, which will allow you re-run the cell once you made changes to your classes/properties without throwing an "class already registered" error. Once you are happy with your ontology and wish to switch back to production mode, you may do this via `YourOntology.set_prod_mode()`. + + ### Define a property (relationship) To define custom object and data properties, the two base classes `ObjectProperty` and `DatatypeProperty` should be used respectively. It should be noted that the user is only required to specify the cardinality of these properties at the class defination, as their `rdfs:domain` and `rdfs:range` will be automatically handled by the class that utilises the defined properties. @@ -347,6 +350,26 @@ another_object_of_one_concept = OneConcept.pull_from_kg( > NOTE the developer should be aware of the `recursive_depth` that one is using to pull the triples from the knowledge graph. +#### NOTE when pulling instances with multiple `rdf:type` definitions + +For instances defined with multiple `rdf:type`, this pulling function instantiates the Python object using the deepest subclass found in the intersection of the subclasses of the calling class and those specified by `rdf:type`. If multiple deepest subclasses coexist (e.g., when subclasses from different branches of the inheritance tree are identified), the code raises an error. To prevent this, you can pull the object directly using the desired subclass. + +For a concrete example using the class hierarchy below, assume an instance is defined with `rdf:type` of both class `C` and `E`. Pulling this instance using `A.pull_from_kg()` will result in an error because both `C` and `E` are identified as potential classes for instantiation, but they belong to different branches of the inheritance tree. A workaround is to pull the instance explicitly using either class `C` or `E`. Alternatively, if a class `F` exist as subclass of both `C` and `E`, pulling the instance with `A.pull_from_kg()` would succeed, as class `F` would be identified as the new "deepest" subclass. + +```mermaid +classDiagram + class A + class B + class C + class D + class E + + A <|-- B + B <|-- C + A <|-- D + D <|-- E +``` + ### Update existing objects in triple store To make changes to the local objects and update it in the triple store: @@ -378,3 +401,7 @@ one_concept.revert_local_changes() - How to generate Python script given an OWL file - Add support for many-to-many cardinality constraints? +- Mermaid codes +- Type hint for object/datatype properties +- Allocate set or single instances when accessing object/datatype properties +- Handle rdf:type when it's a class diff --git a/JPS_BASE_LIB/python_wrapper/setup.py b/JPS_BASE_LIB/python_wrapper/setup.py index 951b965c235..4ca602757c3 100644 --- a/JPS_BASE_LIB/python_wrapper/setup.py +++ b/JPS_BASE_LIB/python_wrapper/setup.py @@ -2,7 +2,7 @@ setup( name='twa', - version='0.0.4', + version='0.0.6', author='Jiaru Bai; Daniel Nurkowski', author_email='jb2197@cam.ac.uk; danieln@cmclinnovations.com', license='MIT', diff --git a/JPS_BASE_LIB/python_wrapper/tests/conftest.py b/JPS_BASE_LIB/python_wrapper/tests/conftest.py index 4749a805a06..46cd9ac1ad4 100644 --- a/JPS_BASE_LIB/python_wrapper/tests/conftest.py +++ b/JPS_BASE_LIB/python_wrapper/tests/conftest.py @@ -186,7 +186,7 @@ def _get_service_url(service_name, url_route): time.sleep(3) if not service_available: - raise RuntimeError("Blazegraph service did not become available within the timeout period") + raise RuntimeError(f"Blazegraph service did not become available within the timeout period: {timeout} seconds") return service_url return _get_service_url diff --git a/JPS_BASE_LIB/python_wrapper/tests/test_base_ontology.py b/JPS_BASE_LIB/python_wrapper/tests/test_base_ontology.py index 766324feda3..231bb7a1450 100644 --- a/JPS_BASE_LIB/python_wrapper/tests/test_base_ontology.py +++ b/JPS_BASE_LIB/python_wrapper/tests/test_base_ontology.py @@ -77,6 +77,10 @@ class E(D): pass +class For_Dev_Mode_Test(BaseClass): + rdfs_isDefinedBy = ExampleOntology + + HasPart = TransitiveProperty.create_from_base('HasPart', ExampleOntology) @@ -104,6 +108,29 @@ def init(): return a1, a2, a3, b, c, d +def test_dev_mode(): + assert not ExampleOntology._dev_mode + assert not ExampleOntology.is_dev_mode() + ExampleOntology.set_dev_mode() + assert ExampleOntology._dev_mode + assert ExampleOntology.is_dev_mode() + ExampleOntology.set_prod_mode() + assert not ExampleOntology._dev_mode + assert not ExampleOntology.is_dev_mode() + with pytest.raises(ValueError) as e_info: + class For_Dev_Mode_Test(BaseClass): + rdfs_isDefinedBy = ExampleOntology + assert e_info.match('https://example.org/example/For_Dev_Mode_Test') + assert e_info.match('already exists in') + """ + E ValueError: Class with rdf_type https://example.org/example/For_Dev_Mode_Test already exists in + : . + """ + ExampleOntology.set_dev_mode() + class For_Dev_Mode_Test(BaseClass): + rdfs_isDefinedBy = ExampleOntology + + def test_retrieve_cardinality(): assert DataProperty_A.retrieve_cardinality() == (0, 1) assert DataProperty_B.retrieve_cardinality() == (1, 1) @@ -151,8 +178,8 @@ def test_basics(): a = A(data_property_a={'a'}, rdfs_comment='my comment', rdfs_label='my label') assert a.data_property_a == {'a'} assert a.rdfs_isDefinedBy.base_url in a.instance_iri - assert a.rdfs_comment == 'my comment' - assert a.rdfs_label == 'my label' + assert a.rdfs_comment == {'my comment'} + assert a.rdfs_label == {'my label'} # test one can instantiate with a custom instance_iri my_random_iri = f'https://{str(uuid.uuid4())}' a_with_random_iri = A(data_property_a={'a'}, instance_iri=my_random_iri) @@ -915,3 +942,172 @@ def test_all_triples_of_nodes(): assert (URIRef(d.instance_iri), URIRef(ObjectProperty_D_A.predicate_iri), URIRef(a1.instance_iri)) in g # in total 6 triples assert sum(1 for _ in g.triples((None, None, None))) == 6 + + +def test_cls_rdfs_comment_label(): + comments = ['comment1', 'comment2', 'comment3'] + labels = ['label1', 'label2'] + + class TestRdfsCommentLabel(E): + rdfs_comment_clz = comments + rdfs_label_clz = labels + + class TestRdfsCommentLabelDataProperty(DatatypeProperty): + rdfs_isDefinedBy = ExampleOntology + rdfs_comment_clz = comments + rdfs_label_clz = labels + + class TestRdfsCommentLabelObjectProperty(ObjectProperty): + rdfs_isDefinedBy = ExampleOntology + rdfs_comment_clz = comments + rdfs_label_clz = labels + + g = Graph() + g = TestRdfsCommentLabel._export_to_owl(g) + g = TestRdfsCommentLabelDataProperty._export_to_owl(g, set(), set()) + g = TestRdfsCommentLabelObjectProperty._export_to_owl(g, set(), set()) + # rdfs:comment triple + for comment in comments: + assert (URIRef(TestRdfsCommentLabel.rdf_type), URIRef(RDFS.comment), Literal(comment)) in g + assert (URIRef(TestRdfsCommentLabelDataProperty.predicate_iri), URIRef(RDFS.comment), Literal(comment)) in g + assert (URIRef(TestRdfsCommentLabelObjectProperty.predicate_iri), URIRef(RDFS.comment), Literal(comment)) in g + # rdfs:label triple + for label in labels: + assert (URIRef(TestRdfsCommentLabel.rdf_type), URIRef(RDFS.label), Literal(label)) in g + assert (URIRef(TestRdfsCommentLabelDataProperty.predicate_iri), URIRef(RDFS.label), Literal(label)) in g + assert (URIRef(TestRdfsCommentLabelObjectProperty.predicate_iri), URIRef(RDFS.label), Literal(label)) in g + + +def test_instances_with_multiple_rdf_type(initialise_sparql_client, recwarn): + a1, a2, a3, b, c, d = init() + # create data property and classes for this test + Data_Property_E_Sub = DatatypeProperty.create_from_base('Data_Property_E_Sub', ExampleOntology, 0, 2) + Data_Property_E_Para = DatatypeProperty.create_from_base('Data_Property_Parallel_To_E', ExampleOntology, 0, 2) + class E_Sub(E): + # this class is used to test the case that the object is pulled from the KG with the correct level of subclass + # i.e. if the object is instance of E but pulled using class D, it should NOT be pulled as E_Sub even E_Sub is a subclass of E + data_property_e_sub: Data_Property_E_Sub[str] + class E_Para(D): + data_property_e_para: Data_Property_E_Para[str] + + # create an object e and e_sub and push it to the KG + INFO_NOT_LOST_FOR_E_SUB = 'this is to test information not lost for e_sub' + INFO_NOT_LOST_FOR_E_PARA = 'this is to test information not lost for parallel_to_e' + NEW_INFO_FOR_E_SUB = 'this is to test new information for e_sub' + NEW_INFO_FOR_E_PARA = 'this is to test new information for parallel_to_e' + e = E(object_property_d_a=[a1], object_property_d_c=[c]) + e_sub = E_Sub(object_property_d_a=[a1], object_property_d_c=[c], data_property_e_sub=INFO_NOT_LOST_FOR_E_SUB) + e_para = E_Para(object_property_d_a=[a1], object_property_d_c=[c], data_property_e_para=INFO_NOT_LOST_FOR_E_PARA) + sparql_client = initialise_sparql_client + e.push_to_kg(sparql_client, -1) + e_sub.push_to_kg(sparql_client, -1) + e_para.push_to_kg(sparql_client, -1) + + # now also insert the triples for additional rdf:type of e and e_sub due to subclassing + sparql_client.perform_update(f'insert data {{ <{e.instance_iri}> <{RDF.type.toPython()}> <{D.rdf_type}> }}') + sparql_client.perform_update(f'insert data {{ <{e_sub.instance_iri}> <{RDF.type.toPython()}> <{D.rdf_type}> }}') + sparql_client.perform_update(f'insert data {{ <{e_sub.instance_iri}> <{RDF.type.toPython()}> <{E.rdf_type}> }}') + # create addtional triples to make e_para also rdf:type of E_Sub, as well as data property for E_Sub + sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{RDF.type.toPython()}> <{E_Sub.rdf_type}> }}') + sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{Data_Property_E_Sub.predicate_iri}> "{INFO_NOT_LOST_FOR_E_SUB}" }}') + + # after clearing the ontology object lookup, the object should be pulled from the KG again as a fresh object + KnowledgeGraph.clear_object_lookup() + + # test 1: pull the object e using D class but it should return as E object + e_pulled = D.pull_from_kg([e.instance_iri], sparql_client, -1)[0] + # the id of the object should be different, meaning it's a different object + assert id(e) != id(e_pulled) + # the pulled object should also be instance of E, but not D + assert type(e_pulled) is E + assert type(e_pulled) is not D + assert type(e_pulled) is not E_Sub + + # test 2: pull the object e using E_Sub class which should raise error + with pytest.raises(ValueError) as e_info: + E_Sub.pull_from_kg([e.instance_iri], sparql_client, -1) + assert e_info.match(f"""The instance {e.instance_iri} is of type """) + assert e_info.match(f"""{E.rdf_type}""") + assert e_info.match(f"""{D.rdf_type}""") + assert e_info.match(f"""it doesn't match the rdf:type of class {E_Sub.__name__} \({E_Sub.rdf_type}\)""") + + # test 3: pull the object e_sub using D class which should return as E_Sub object + e_sub_pulled = D.pull_from_kg([e_sub.instance_iri], sparql_client, -1)[0] + # the id of the object should be different, meaning it's a different object + assert id(e_sub) != id(e_sub_pulled) + # the pulled object should also be instance of E_Sub, but not D + assert type(e_sub_pulled) is E_Sub + assert type(e_sub_pulled) is not D + assert type(e_sub_pulled) is not E + # the information should be preserved + assert e_sub_pulled.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB} + + # test 4: pull the object e_para using D class should throw an error as there's no subclass relation between E_Sub and E_Para + with pytest.raises(ValueError) as e_info: + D.pull_from_kg([e_para.instance_iri], sparql_client, -1) + assert e_info.match(f"""The instance {e_para.instance_iri} is of type """) + assert e_info.match(f"""Amongst the pulling class {D.__name__} \({D.rdf_type}\)""") + assert e_info.match(f"""and its subclasses \({D.construct_subclass_dictionary()}\)""") + assert e_info.match(f"""{E_Sub.rdf_type}""") + assert e_info.match(f"""{E_Para.rdf_type}""") + assert e_info.match(f"""there exist classes that are not in the same branch of the inheritance tree""") + assert e_info.match(f"""please check the inheritance tree is correctly defined in Python""") + + # test 5: pull the object e_para using E_Sub class should return as E_Sub object + e_para_pulled_as_e_sub = E_Sub.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0] + # the id of the object should be different, meaning it's a different object + assert id(e_para) != id(e_para_pulled_as_e_sub) + # the pulled object should also be instance of E_Sub, but not E_Para + assert type(e_para_pulled_as_e_sub) is E_Sub + assert type(e_para_pulled_as_e_sub) is not E_Para + assert type(e_para_pulled_as_e_sub) is not D + # the information should be preserved + assert e_para_pulled_as_e_sub.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB} + # if I now change the data property of E_Sub, it should not affect the data property of E_Para which is not pulled as part of e_para_pulled_as_e_sub + e_para_pulled_as_e_sub.data_property_e_sub.add(NEW_INFO_FOR_E_SUB) + e_para_pulled_as_e_sub.push_to_kg(sparql_client, -1) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, NEW_INFO_FOR_E_SUB, XSD.string.toPython()) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, INFO_NOT_LOST_FOR_E_SUB, XSD.string.toPython()) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, INFO_NOT_LOST_FOR_E_PARA, XSD.string.toPython()) + + # test 6: pull the object e_para using E_Para class should return as E_Para object + e_para_pulled_as_e_para = E_Para.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0] + # the id of the object should be different, meaning it's a different object + assert id(e_para) != id(e_para_pulled_as_e_para) + # the pulled object should also be instance of E_Para, but not E_Sub + assert type(e_para_pulled_as_e_para) is E_Para + assert type(e_para_pulled_as_e_para) is not E_Sub + assert type(e_para_pulled_as_e_para) is not D + # the information should be preserved + assert e_para_pulled_as_e_para.data_property_e_para == {INFO_NOT_LOST_FOR_E_PARA} + # if I now change the data property of E_Para, it should not affect the data property of E_Sub which is not pulled as part of e_para_pulled_as_e_para + e_para_pulled_as_e_para.data_property_e_para.add(NEW_INFO_FOR_E_PARA) + e_para_pulled_as_e_para.push_to_kg(sparql_client, -1) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, NEW_INFO_FOR_E_PARA, XSD.string.toPython()) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Para.predicate_iri, INFO_NOT_LOST_FOR_E_PARA, XSD.string.toPython()) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, INFO_NOT_LOST_FOR_E_SUB, XSD.string.toPython()) + assert sparql_client.check_if_triple_exist(e_para.instance_iri, Data_Property_E_Sub.predicate_iri, NEW_INFO_FOR_E_SUB, XSD.string.toPython()) + + # test 7: create a new class and make it subclass of E_Sub and E_Para, then pulling it with D class should return as the new class object + class New_E_Super_Sub(E_Para, E_Sub): + pass + # make the object e_para also rdf:type of New_E_Super_Sub + sparql_client.perform_update(f'insert data {{ <{e_para.instance_iri}> <{RDF.type.toPython()}> <{New_E_Super_Sub.rdf_type}> }}') + e_super_sub = D.pull_from_kg([e_para.instance_iri], sparql_client, -1)[0] + # the id of the object should be different, meaning it's a different object + assert id(e_para) != id(e_super_sub) + # the pulled object should also be instance of New_E_Super_Sub, but not E_Sub nor E_Para + assert type(e_super_sub) is New_E_Super_Sub + assert type(e_super_sub) is not E_Sub + assert type(e_super_sub) is not E_Para + assert type(e_super_sub) is not D + # the information should be preserved + assert e_super_sub.data_property_e_para == {INFO_NOT_LOST_FOR_E_PARA, NEW_INFO_FOR_E_PARA} + assert e_super_sub.data_property_e_sub == {INFO_NOT_LOST_FOR_E_SUB, NEW_INFO_FOR_E_SUB} + + # final check: all the warning messages when overiwritting the pulled object in the registry + assert len(recwarn) == 2 + warning_message_1 = str(recwarn[0].message) + assert f"""An object with the same IRI {e_para.instance_iri} has already been instantiated and registered with type {E_Sub}. Replacing its regiatration now with type {E_Para}.""" in warning_message_1 + warning_message_2 = str(recwarn[1].message) + assert f"""An object with the same IRI {e_para.instance_iri} has already been instantiated and registered with type {E_Para}. Replacing its regiatration now with type {New_E_Super_Sub}.""" in warning_message_2 diff --git a/JPS_BASE_LIB/python_wrapper/twa/__init__.py b/JPS_BASE_LIB/python_wrapper/twa/__init__.py index c9395dd36b4..e253d541ad9 100644 --- a/JPS_BASE_LIB/python_wrapper/twa/__init__.py +++ b/JPS_BASE_LIB/python_wrapper/twa/__init__.py @@ -1,3 +1,3 @@ from twa.JPSGateway import JPSGateway -__version__ = "0.0.4" +__version__ = "0.0.6" diff --git a/JPS_BASE_LIB/python_wrapper/twa/data_model/base_ontology.py b/JPS_BASE_LIB/python_wrapper/twa/data_model/base_ontology.py index 2c4b9584a62..9ef4e927bc5 100644 --- a/JPS_BASE_LIB/python_wrapper/twa/data_model/base_ontology.py +++ b/JPS_BASE_LIB/python_wrapper/twa/data_model/base_ontology.py @@ -170,7 +170,7 @@ def _register_ontology(cls, ontology: BaseOntology): cls.ontology_lookup[ontology.namespace_iri] = ontology @classmethod - def _register_class(cls, ontology_class: BaseClass): + def _register_class(cls, ontology_class: BaseClass, dev_mode: bool = False): """ This method registers a BaseClass (the Pydantic class itself) to the knowledge graph in Python memory. @@ -180,12 +180,12 @@ def _register_class(cls, ontology_class: BaseClass): if cls.class_lookup is None: cls.class_lookup = {} - if ontology_class.rdf_type in cls.class_lookup: + if ontology_class.rdf_type in cls.class_lookup and not dev_mode: raise ValueError(f'Class with rdf_type {ontology_class.rdf_type} already exists in the knowledge graph: {cls.class_lookup[ontology_class.rdf_type]}.') cls.class_lookup[ontology_class.rdf_type] = ontology_class @classmethod - def _register_property(cls, prop: BaseProperty): + def _register_property(cls, prop: BaseProperty, dev_mode: bool = False): """ This method registers a BaseProperty (the Pydantic class itself) to the knowledge graph in Python memory. @@ -195,7 +195,7 @@ def _register_property(cls, prop: BaseProperty): if cls.property_lookup is None: cls.property_lookup = {} - if prop.predicate_iri in cls.property_lookup: + if prop.predicate_iri in cls.property_lookup and not dev_mode: raise ValueError(f'Property with predicate IRI {prop.predicate_iri} already exists in the knowledge graph: {cls.property_lookup[prop.predicate_iri]}.') cls.property_lookup[prop.predicate_iri] = prop @@ -260,8 +260,9 @@ class BaseOntology(BaseModel): class_lookup: ClassVar[Dict[str, BaseClass]] = None object_property_lookup: ClassVar[Dict[str, ObjectProperty]] = None data_property_lookup: ClassVar[Dict[str, DatatypeProperty]] = None - rdfs_comment: ClassVar[str] = None + rdfs_comment: ClassVar[Set[str]] = None owl_versionInfo: ClassVar[str] = None + _dev_mode: ClassVar[bool] = False @classmethod def __pydantic_init_subclass__(cls, **kwargs): @@ -276,6 +277,21 @@ def __pydantic_init_subclass__(cls, **kwargs): # register the ontology to the knowledge graph KnowledgeGraph._register_ontology(cls) + @classmethod + def is_dev_mode(cls): + """This method returns whether the KnowledgeGraph is in development mode.""" + return cls._dev_mode + + @classmethod + def set_dev_mode(cls): + """This method sets the KnowledgeGraph to development mode, where duplicate class or property registration will be allowed that the existing ones will be overwritten.""" + cls._dev_mode = True + + @classmethod + def set_prod_mode(cls): + """This method sets the KnowledgeGraph to production mode, where duplicate class or property registration will raise an error.""" + cls._dev_mode = False + @classmethod def _register_class(cls, ontolgy_class: BaseClass): """ @@ -289,12 +305,12 @@ def _register_class(cls, ontolgy_class: BaseClass): if cls.class_lookup is None: cls.class_lookup = {} - if ontolgy_class.rdf_type in cls.class_lookup: + if ontolgy_class.rdf_type in cls.class_lookup and not cls.is_dev_mode(): raise ValueError(f'Class with rdf_type {ontolgy_class.rdf_type} already exists in {cls}: {cls.class_lookup[ontolgy_class.rdf_type]}.') cls.class_lookup[ontolgy_class.rdf_type] = ontolgy_class # also register with knowledge graph - KnowledgeGraph._register_class(ontolgy_class) + KnowledgeGraph._register_class(ontolgy_class, cls.is_dev_mode()) @classmethod def _register_object_property(cls, prop: ObjectProperty): @@ -309,12 +325,12 @@ def _register_object_property(cls, prop: ObjectProperty): if cls.object_property_lookup is None: cls.object_property_lookup = {} - if prop.predicate_iri in cls.object_property_lookup: + if prop.predicate_iri in cls.object_property_lookup and not cls.is_dev_mode(): raise ValueError(f'Object property with predicate IRI {prop.predicate_iri} already exists in {cls}: {cls.object_property_lookup[prop.predicate_iri]}.') cls.object_property_lookup[prop.predicate_iri] = prop # also register with knowledge graph - KnowledgeGraph._register_property(prop) + KnowledgeGraph._register_property(prop, cls.is_dev_mode()) @classmethod def _register_data_property(cls, prop: DatatypeProperty): @@ -329,12 +345,12 @@ def _register_data_property(cls, prop: DatatypeProperty): if cls.data_property_lookup is None: cls.data_property_lookup = {} - if prop.predicate_iri in cls.data_property_lookup: + if prop.predicate_iri in cls.data_property_lookup and not cls.is_dev_mode(): raise ValueError(f'Data property with predicate IRI {prop.predicate_iri} already exists in {cls}: {cls.data_property_lookup[prop.predicate_iri]}.') cls.data_property_lookup[prop.predicate_iri] = prop # also register with knowledge graph - KnowledgeGraph._register_property(prop) + KnowledgeGraph._register_property(prop, cls.is_dev_mode()) @classmethod def export_to_graph(cls, g: Graph = None) -> Graph: @@ -351,7 +367,11 @@ def export_to_graph(cls, g: Graph = None) -> Graph: g.add((URIRef(cls.namespace_iri), RDF.type, OWL.Ontology)) g.add((URIRef(cls.namespace_iri), DC.date, Literal(datetime.now().isoformat()))) if bool(cls.rdfs_comment): - g.add((URIRef(cls.namespace_iri), RDFS.comment, Literal(cls.rdfs_comment))) + if isinstance(cls.rdfs_comment, str): + g.add((URIRef(cls.namespace_iri), RDFS.comment, Literal(cls.rdfs_comment))) + elif isinstance(cls.rdfs_comment, set): + for comment in cls.rdfs_comment: + g.add((URIRef(cls.namespace_iri), RDFS.comment, Literal(comment))) if bool(cls.owl_versionInfo): g.add((URIRef(cls.namespace_iri), OWL.versionInfo, Literal(cls.owl_versionInfo))) # handle all classes @@ -426,6 +446,8 @@ class BaseProperty(set, Generic[T]): """ rdfs_isDefinedBy: ClassVar[Type[BaseOntology]] = None predicate_iri: ClassVar[str] = None + rdfs_comment_clz: ClassVar[Set[str]] = None + rdfs_label_clz: ClassVar[Set[str]] = None owl_minQualifiedCardinality: ClassVar[int] = 0 owl_maxQualifiedCardinality: ClassVar[int] = None @@ -513,6 +535,13 @@ def _export_to_owl( idx = cls.__mro__.index(DatatypeProperty) for i in range(1, idx): g.add((URIRef(property_iri), RDFS.subPropertyOf, URIRef(cls.__mro__[i].predicate_iri))) + # add rdfs_comment_clz and rdfs_label_clz for class + if bool(cls.rdfs_comment_clz): + for comment in cls.rdfs_comment_clz: + g.add((URIRef(property_iri), RDFS.comment, Literal(comment))) + if bool(cls.rdfs_label_clz): + for label in cls.rdfs_label_clz: + g.add((URIRef(property_iri), RDFS.label, Literal(label))) # add domain if len(rdfs_domain) == 0: # it is possible that a property is defined without specifying its domain, so we only print a warning @@ -704,8 +733,10 @@ class MyClass(BaseClass): rdf_type: ClassVar[str] = OWL_BASE_URL + 'Class' """ > NOTE rdf_type is the automatically generated IRI of the class which can also be accessed at the instance level. """ object_lookup: ClassVar[Dict[str, BaseClass]] = None - rdfs_comment: Optional[str] = Field(default=None) - rdfs_label: Optional[str] = Field(default=None) + rdfs_comment_clz: ClassVar[Set[str]] = None + rdfs_label_clz: ClassVar[Set[str]] = None + rdfs_comment: Optional[Set[str]] = Field(default_factory=set) + rdfs_label: Optional[Set[str]] = Field(default_factory=set) instance_iri: str = Field(default='') # format of the cache for all properties: {property_name: property_object} _latest_cache: Dict[str, Any] = PrivateAttr(default_factory=dict) @@ -732,6 +763,24 @@ def __pydantic_init_subclass__(cls, **kwargs): # register the class to the ontology cls.rdfs_isDefinedBy._register_class(cls) + @classmethod + def init_instance_iri(cls) -> str: + return init_instance_iri(cls.rdfs_isDefinedBy.namespace_iri, cls.__name__) + + def __init__(self, **data): + # handle the case when rdfs_comment and rdfs_label are provided as a non-set value + if 'rdfs_comment' in data and not isinstance(data['rdfs_comment'], set): + if isinstance(data['rdfs_comment'], list): + data['rdfs_comment'] = set(data['rdfs_comment']) + else: + data['rdfs_comment'] = {data['rdfs_comment']} + if 'rdfs_label' in data and not isinstance(data['rdfs_label'], set): + if isinstance(data['rdfs_label'], list): + data['rdfs_label'] = set(data['rdfs_label']) + else: + data['rdfs_label'] = {data['rdfs_label']} + super().__init__(**data) + def model_post_init(self, __context: Any) -> None: """ The post init process of the BaseClass. @@ -745,10 +794,7 @@ def model_post_init(self, __context: Any) -> None: None: It calls the super().model_post_init(__context) to finish the post init process """ if not bool(self.instance_iri): - self.instance_iri = init_instance_iri( - self.__class__.rdfs_isDefinedBy.namespace_iri, - self.__class__.__name__ - ) + self.instance_iri = self.__class__.init_instance_iri() # set new instance to the global look up table, so that we can avoid creating the same instance multiple times self._register_object() return super().model_post_init(__context) @@ -764,8 +810,13 @@ def _register_object(self): if self.__class__.object_lookup is None: self.__class__.object_lookup = {} if self.instance_iri in self.__class__.object_lookup: - raise ValueError( - f"An object with the same IRI {self.instance_iri} has already been registered.") + if type(self.__class__.object_lookup[self.instance_iri]) == type(self): + # TODO and not self.__class__.rdfs_isDefinedBy.is_dev_mode()? + raise ValueError( + f"An object with the same IRI {self.instance_iri} has already been instantiated and registered with the same type {type(self)}.") + else: + warnings.warn(f"An object with the same IRI {self.instance_iri} has already been instantiated and registered with type {type(self.__class__.object_lookup[self.instance_iri])}. Replacing its regiatration now with type {type(self)}.") + del self.__class__.object_lookup[self.instance_iri] self.__class__.object_lookup[self.instance_iri] = self @classmethod @@ -820,6 +871,7 @@ def push_all_instances_to_kg( for obj in cls.object_lookup.values(): g_to_remove, g_to_add = obj._collect_diff_to_graph(g_to_remove, g_to_add, recursive_depth) sparql_client.delete_and_insert_graphs(g_to_remove, g_to_add) + return g_to_remove, g_to_add @classmethod def clear_object_lookup(cls): @@ -884,13 +936,49 @@ def pull_from_kg( for iri, props in node_dct.items(): # TODO optimise the time complexity of the following code when the number of instances is large # check if the rdf:type of the instance matches the calling class or any of its subclasses - target_clz_rdf_type = list(props[RDF.type.toPython()])[0] - if target_clz_rdf_type != cls.rdf_type and target_clz_rdf_type not in cls.construct_subclass_dictionary().keys(): + target_clz_rdf_types = set(props.get(RDF.type.toPython(), [])) # NOTE this supports instance instantiated with multiple rdf:type + if not target_clz_rdf_types: + raise ValueError(f"The instance {iri} has no rdf:type, retrieved outgoing links and attributes: {props}.") + cls_subclasses = set(cls.construct_subclass_dictionary().keys()) + cls_subclasses.add(cls.rdf_type) + intersection = target_clz_rdf_types & cls_subclasses + if intersection: + if len(intersection) == 1: + target_clz_rdf_type = next(iter(intersection)) + else: + # NOTE instead of using the first element of the intersection + # we find the deepest subclass as target_clz_rdf_type + # so that the created object could inherite all the properties of its parent classes + # which prevents the loss of information + parent_classes = set() + for c in intersection: + if c in parent_classes: + # skip if it's already a parent class + continue + for other in intersection: + if other != c and issubclass(cls.retrieve_subclass(c), cls.retrieve_subclass(other)): + parent_classes.add(other) + deepest_subclasses = intersection - parent_classes + if len(deepest_subclasses) > 1: + # TODO [future] add support for allowing users to specify the target class + KnowledgeGraph._remove_iri_from_loading(iri) + raise ValueError( + f"""The instance {iri} is of type {target_clz_rdf_types}. + Amongst the pulling class {cls.__name__} ({cls.rdf_type}) + and its subclasses ({cls.construct_subclass_dictionary()}), + there exist classes that are not in the same branch of the inheritance tree, + including {deepest_subclasses}, + therefore it cannot be instantiated by pulling with class {cls.__name__}. + Please consider pulling the instance directly with one of the class in {deepest_subclasses} + Alternatively, please check the inheritance tree is correctly defined in Python.""") + else: + target_clz_rdf_type = next(iter(deepest_subclasses)) + else: # if there's any error, remove the iri from the loading status # otherwise it will block any further pulling of the same object KnowledgeGraph._remove_iri_from_loading(iri) raise ValueError( - f"""The instance {iri} is of type {props[RDF.type.toPython()]}, + f"""The instance {iri} is of type {target_clz_rdf_types}, it doesn't match the rdf:type of class {cls.__name__} ({cls.rdf_type}), nor any of its subclasses ({cls.construct_subclass_dictionary()}), therefore it cannot be instantiated.""") @@ -930,15 +1018,11 @@ def pull_from_kg( # handle rdfs:label and rdfs:comment (also fetch of the remote KG) rdfs_properties_dict = {} if RDFS.label.toPython() in props: - if len(props[RDFS.label.toPython()]) > 1: - raise ValueError(f"The instance {iri} has multiple rdfs:label {props[RDFS.label.toPython()]}.") - rdfs_properties_dict['rdfs_label'] = list(props[RDFS.label.toPython()])[0] + rdfs_properties_dict['rdfs_label'] = set(list(props[RDFS.label.toPython()])) if RDFS.comment.toPython() in props: - if len(props[RDFS.comment.toPython()]) > 1: - raise ValueError(f"The instance {iri} has multiple rdfs:comment {props[RDFS.comment.toPython()]}.") - rdfs_properties_dict['rdfs_comment'] = list(props[RDFS.comment.toPython()])[0] + rdfs_properties_dict['rdfs_comment'] = set(list(props[RDFS.comment.toPython()])) # instantiate the object - if inst is not None: + if inst is not None and type(inst) is target_clz: for op_iri, op_dct in ops.items(): if flag_pull: # below lines pull those object properties that are NOT connected in the remote KG, @@ -1073,6 +1157,13 @@ def _export_to_owl(cls, g: Graph) -> Graph: cls_iri = cls.rdf_type g.add((URIRef(cls_iri), RDF.type, OWL.Class)) g.add((URIRef(cls_iri), RDFS.isDefinedBy, URIRef(cls.rdfs_isDefinedBy.namespace_iri))) + # add rdfs_comment_clz and rdfs_label_clz for class + if bool(cls.rdfs_comment_clz): + for comment in cls.rdfs_comment_clz: + g.add((URIRef(cls_iri), RDFS.comment, Literal(comment))) + if bool(cls.rdfs_label_clz): + for label in cls.rdfs_label_clz: + g.add((URIRef(cls_iri), RDFS.label, Literal(label))) # add super classes idx = cls.__mro__.index(BaseClass) for i in range(1, idx): @@ -1219,9 +1310,9 @@ def _update_according_to_fetch(self, fetched: dict, flag_connect_object: bool, f # compare rdfs_comment and rdfs_label for r in ['rdfs_comment', 'rdfs_label']: - fetched_value = fetched.get(r, None) - cached_value = self._latest_cache.get(r, None) - local_value = getattr(self, r) + fetched_value = fetched.get(r, set()) + cached_value = self._latest_cache.get(r, set()) + local_value = getattr(self, r) if getattr(self, r) is not None else set() # apply the same logic as above if fetched_value != cached_value: if local_value == cached_value: @@ -1330,6 +1421,8 @@ def _collect_diff_to_graph(self, g_to_remove: Graph, g_to_add: Graph, recursive_ recursive_depth = max(recursive_depth - 1, 0) if recursive_depth > -1 else max(recursive_depth - 1, -1) p_cache = self._latest_cache.get(f, set()) + if p_cache is None: + p_cache = set() # allows set operations p_now = getattr(self, f) if p_now is None: p_now = set() # allows set operations @@ -1357,18 +1450,20 @@ def _collect_diff_to_graph(self, g_to_remove: Graph, g_to_add: Graph, recursive_ g_to_remove, g_to_add = d_py._collect_diff_to_graph(g_to_remove, g_to_add, recursive_depth, traversed_iris) elif f == 'rdfs_comment': - if self._latest_cache.get(f) != self.rdfs_comment: - if self._latest_cache.get(f) is not None: - g_to_remove.add((URIRef(self.instance_iri), RDFS.comment, Literal(self._latest_cache.get(f)))) - if self.rdfs_comment is not None: - g_to_add.add((URIRef(self.instance_iri), RDFS.comment, Literal(self.rdfs_comment))) + rdfs_comment_cache = self._latest_cache.get(f, set()) + rdfs_comment_now = self.rdfs_comment if self.rdfs_comment is not None else set() + for comment in rdfs_comment_cache - rdfs_comment_now: + g_to_remove.add((URIRef(self.instance_iri), RDFS.comment, Literal(comment))) + for comment in rdfs_comment_now - rdfs_comment_cache: + g_to_add.add((URIRef(self.instance_iri), RDFS.comment, Literal(comment))) elif f == 'rdfs_label': - if self._latest_cache.get(f) != self.rdfs_label: - if self._latest_cache.get(f) is not None: - g_to_remove.add((URIRef(self.instance_iri), RDFS.label, Literal(self._latest_cache.get(f)))) - if self.rdfs_label is not None: - g_to_add.add((URIRef(self.instance_iri), RDFS.label, Literal(self.rdfs_label))) + rdfs_label_cache = self._latest_cache.get(f, set()) + rdfs_label_now = self.rdfs_label if self.rdfs_label is not None else set() + for label in rdfs_label_cache - rdfs_label_now: + g_to_remove.add((URIRef(self.instance_iri), RDFS.label, Literal(label))) + for label in rdfs_label_now - rdfs_label_cache: + g_to_add.add((URIRef(self.instance_iri), RDFS.label, Literal(label))) if not self._exist_in_kg: g_to_add.add((URIRef(self.instance_iri), RDF.type, URIRef(self.rdf_type))) @@ -1403,10 +1498,12 @@ def graph(self, g: Graph = None) -> Graph: prop = getattr(self, f) if getattr(self, f) is not None else set() for o in prop: g.add((URIRef(self.instance_iri), URIRef(tp.predicate_iri), Literal(o))) - elif f == 'rdfs_comment' and self.rdfs_comment is not None: - g.add((URIRef(self.instance_iri), RDFS.comment, Literal(self.rdfs_comment))) - elif f == 'rdfs_label' and self.rdfs_label is not None: - g.add((URIRef(self.instance_iri), RDFS.label, Literal(self.rdfs_label))) + elif f == 'rdfs_comment' and bool(self.rdfs_comment): + for comment in self.rdfs_comment: + g.add((URIRef(self.instance_iri), RDFS.comment, Literal(comment))) + elif f == 'rdfs_label' and bool(self.rdfs_label): + for label in self.rdfs_label: + g.add((URIRef(self.instance_iri), RDFS.label, Literal(label))) return g def triples(self) -> str: diff --git a/JPS_Ontology/ontology/ontomops/ontomops-ogm.ttl b/JPS_Ontology/ontology/ontomops/ontomops-ogm.ttl new file mode 100644 index 00000000000..4448210912b --- /dev/null +++ b/JPS_Ontology/ontology/ontomops/ontomops-ogm.ttl @@ -0,0 +1,329 @@ +@prefix dc: . +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:ObjectProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:integer . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:integer . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:integer . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:double . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:double . + + a owl:DatatypeProperty ; + rdfs:domain [ a owl:Class ; + owl:unionOf ( ) ] ; + rdfs:isDefinedBy ; + rdfs:range xsd:double . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:double . + + a owl:ObjectProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:domain ; + rdfs:isDefinedBy ; + rdfs:range xsd:string . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf . + + a owl:Class ; + rdfs:isDefinedBy . + + a owl:Class ; + rdfs:isDefinedBy ; + rdfs:subClassOf , + . + + a owl:Ontology ; + dc:date "2024-12-07T19:03:28.517090" ; + rdfs:comment "An ontology developed for representing Metal-Organic Polyhedra (MOPs). This is object graph mapper (OGM) version." ; + owl:versionInfo "1.1-ogm" . + diff --git a/MOPTools/MOP_Discovery/assembler/assembler.py b/MOPTools/MOP_Discovery/assembler/assembler.py index 07a76cf53f5..1b736a387c9 100644 --- a/MOPTools/MOP_Discovery/assembler/assembler.py +++ b/MOPTools/MOP_Discovery/assembler/assembler.py @@ -165,8 +165,8 @@ def mop_string_writer(mop_building_1, mop_building_2): def mop_data_wrapper(mopFormula, altmopFormula, mop_symmetry, mop_weight,mop_charge): """First queries each MOP formula string, then wraps the provenanve, molecular weight and charge.""" checked = [] - result1 = querykg(SPARQL_ENDPOINTS['ontomops'], mop_reference(mopFormula, mop_symmetry)) - result2 = querykg(SPARQL_ENDPOINTS['ontomops'], mop_reference(altmopFormula, mop_symmetry)) + result1 = querykg(SPARQL_ENDPOINTS.ontomops, mop_reference(mopFormula, mop_symmetry)) + result2 = querykg(SPARQL_ENDPOINTS.ontomops, mop_reference(altmopFormula, mop_symmetry)) if result1: if 'MOPReference' in result1[0].keys(): provenance = result1[0]['MOPReference'] diff --git a/MOPTools/MOP_Discovery/kg_overview/analytics_operations.py b/MOPTools/MOP_Discovery/kg_overview/analytics_operations.py index 704618d42f5..f3de0a9e7ac 100644 --- a/MOPTools/MOP_Discovery/kg_overview/analytics_operations.py +++ b/MOPTools/MOP_Discovery/kg_overview/analytics_operations.py @@ -16,7 +16,7 @@ def mopsoverview(): """Collects all MOP IRIs found in the OntoMOP KG""" - result = querykg(SPARQL_ENDPOINTS['ontomops'], getMOPIRIs()) + result = querykg(SPARQL_ENDPOINTS.ontomops, getMOPIRIs()) refinedlist = [] # Each IRI is saved in the list of refined IRIs for item in result: refined = item['mopIRI'] @@ -42,7 +42,7 @@ def assemblyModelGroups(listofMOPs): # When quering SPARQL we obtain two lines of information deriving from each GBU/CBU # We do not know which CBU will be returned first, therefore this is being sorted. for mopIRI in listofMOPs: - MOPandGBUs = querykg(SPARQL_ENDPOINTS['ontomops'], mop_GBUs(mopIRI)) + MOPandGBUs = querykg(SPARQL_ENDPOINTS.ontomops, mop_GBUs(mopIRI)) i = 0 assemblyModel = {} for MOPandGBU in MOPandGBUs: @@ -138,7 +138,7 @@ def order(cbuA, cbuB): gbu['CBU1_Modularity'] = cbuA['Modularity'] gbu['CBU1_Planarity'] = cbuA['Planarity'] gbu['CBU1_Type'] = cbuA['CBUType'] - gbu['CBU1_SpeciesIRI'] = cbuA['speciesIRI'] + gbu['CBU1_SpeciesIRI'] = cbuA['cbuIRI'] gbu['CBU1_OuterCoordination'] = cbuA['OuterCoordination'] gbu['CBU1_FunctionalGroup'] = cbuA['CBUFunctionalGroup'] gbu['CBU1_Direction'] = cbuA['Direction'] @@ -147,7 +147,7 @@ def order(cbuA, cbuB): gbu['CBU2_Modularity'] = cbuB['Modularity'] gbu['CBU2_Planarity'] = cbuB['Planarity'] gbu['CBU2_Type'] = cbuB['CBUType'] - gbu['CBU2_SpeciesIRI'] = cbuB['speciesIRI'] + gbu['CBU2_SpeciesIRI'] = cbuB['cbuIRI'] gbu['CBU2_OuterCoordination'] = cbuB['OuterCoordination'] gbu['CBU2_FunctionalGroup'] = cbuB['CBUFunctionalGroup'] gbu['CBU2_Direction'] = cbuB['Direction'] @@ -157,7 +157,7 @@ def order(cbuA, cbuB): gbu['CBU1_Modularity'] = cbuB['Modularity'] gbu['CBU1_Planarity'] = cbuB['Planarity'] gbu['CBU1_Type'] = cbuB['CBUType'] - gbu['CBU1_SpeciesIRI'] = cbuB['speciesIRI'] + gbu['CBU1_SpeciesIRI'] = cbuB['cbuIRI'] gbu['CBU1_OuterCoordination'] = cbuB['OuterCoordination'] gbu['CBU1_FunctionalGroup'] = cbuB['CBUFunctionalGroup'] gbu['CBU1_Direction'] = cbuB['Direction'] @@ -166,7 +166,7 @@ def order(cbuA, cbuB): gbu['CBU2_Modularity'] = cbuA['Modularity'] gbu['CBU2_Planarity'] = cbuA['Planarity'] gbu['CBU2_Type'] = cbuA['CBUType'] - gbu['CBU2_SpeciesIRI'] = cbuA['speciesIRI'] + gbu['CBU2_SpeciesIRI'] = cbuA['cbuIRI'] gbu['CBU2_OuterCoordination'] = cbuA['OuterCoordination'] gbu['CBU2_FunctionalGroup'] = cbuA['CBUFunctionalGroup'] gbu['CBU2_Direction'] = cbuA['Direction'] diff --git a/MOPTools/MOP_Discovery/kg_overview/analytics_output.py b/MOPTools/MOP_Discovery/kg_overview/analytics_output.py index 57537f19a0f..e9c2790e2ec 100644 --- a/MOPTools/MOP_Discovery/kg_overview/analytics_output.py +++ b/MOPTools/MOP_Discovery/kg_overview/analytics_output.py @@ -28,30 +28,26 @@ def kgoverview_csv(uniques): def r1_json(list_R1): """Writes JSON file for List R1.""" list_R1_jsonpath = FILE_PATHS['list_R1'] - outpreR1 = json.dumps(list_R1, indent=4) - jsonoutput = open(list_R1_jsonpath, 'w') - jsonoutput.write(outpreR1) + with open(list_R1_jsonpath, 'w') as f: + json.dump(list_R1, f, indent=4) def preR2_json(list_preR2): """Writes JSON file for List preR2""" list_preR2_jsonpath = FILE_PATHS['list_preR2'] - outpreR2 = json.dumps(list_preR2, indent=4) - jsonoutput = open(list_preR2_jsonpath, 'w') - jsonoutput.write(outpreR2) + with open(list_preR2_jsonpath, 'w') as f: + json.dump(list_preR2, f, indent=4) def assemblyModel_json(assemblyModel, string): """Produces a starting and final json file with library MOPs and their respective properties.""" assemblyModel_jsonpath = FILE_PATHS['mops_am'] - outAssemblyModel = json.dumps(assemblyModel, indent=4) - jsonoutput = open(assemblyModel_jsonpath+string+'.json', 'w') - jsonoutput.write(outAssemblyModel) + with open(assemblyModel_jsonpath+string+'.json', 'w') as f: + json.dump(assemblyModel, f, indent=4) def assemblyModel_json_temp(assemblyModel, string, frequency): """For each assemly model produces a temporary file.""" assemblyModel_jsonpath = FILE_PATHS['temp'] - outAssemblyModel = json.dumps(assemblyModel, indent=4) - jsonoutput = open(assemblyModel_jsonpath+string+"__"+str(frequency)+'.json', 'w') - jsonoutput.write(outAssemblyModel) + with open(assemblyModel_jsonpath+string+"__"+str(frequency)+'.json', 'w') as f: + json.dump(assemblyModel, f, indent=4) def assemblyModel_json_update(string, frequency): """Merges the temporary json file to the original and final json.""" diff --git a/MOPTools/MOP_Discovery/main.py b/MOPTools/MOP_Discovery/main.py index fd451e0879d..3dba5d2b212 100644 --- a/MOPTools/MOP_Discovery/main.py +++ b/MOPTools/MOP_Discovery/main.py @@ -13,7 +13,7 @@ def start(): FOLDER_NAME = 'mops_output' mops_output() workflow() - shutil.rmtree(FOLDER_NAME+'\\temp') + shutil.rmtree(os.path.join(FOLDER_NAME, 'temp')) def mops_output(): FOLDER_NAME = 'mops_output' @@ -22,14 +22,14 @@ def mops_output(): " or delete it if you want to regenerate the output.\nOtherwise I am not doing anything. Exiting now.") return os.mkdir(FOLDER_NAME) - os.mkdir(FOLDER_NAME+'\\mops_am') - os.mkdir(FOLDER_NAME+'\\r1_cbus') - os.mkdir(FOLDER_NAME+'\\r2_cbus') - os.mkdir(FOLDER_NAME+'\\temp') - os.mkdir(FOLDER_NAME+'\\mops_r1') - os.mkdir(FOLDER_NAME+'\\mops_r2') + os.mkdir(os.path.join(FOLDER_NAME, 'mops_am')) + os.mkdir(os.path.join(FOLDER_NAME, 'r1_cbus')) + os.mkdir(os.path.join(FOLDER_NAME, 'r2_cbus')) + os.mkdir(os.path.join(FOLDER_NAME, 'temp')) + os.mkdir(os.path.join(FOLDER_NAME, 'mops_r1')) + os.mkdir(os.path.join(FOLDER_NAME, 'mops_r2')) if __name__ == '__main__': print ("Started at ", datetime.datetime.now()) start() - print ("Finished at ", datetime.datetime.now()) \ No newline at end of file + print ("Finished at ", datetime.datetime.now()) diff --git a/MOPTools/MOP_Discovery/manager/file_paths.py b/MOPTools/MOP_Discovery/manager/file_paths.py index 74690160a04..2e208a9b183 100644 --- a/MOPTools/MOP_Discovery/manager/file_paths.py +++ b/MOPTools/MOP_Discovery/manager/file_paths.py @@ -3,19 +3,21 @@ @author: Aleksandar Kondinski ''' - + +import os +base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) FILE_PATHS = { - 'r1_mops_csv': 'mops_output\\r1_mops.csv', - 'r2_mops_csv': 'mops_output\\r2_mops.csv', - 'r1andr2_csv': 'mops_output\\r1_r2.csv', - 'kg_assembly_csv': 'mops_output\\kg_analysis_1.csv', - 'mops_am': 'mops_output\\mops_am\\', - 'r1_cbus': 'mops_output\\r1_cbus\\', - 'r2_cbus': 'mops_output\\r2_cbus\\', - 'list_R1': 'mops_output\\list_R1.json', - 'list_preR2': 'mops_output\\list_preR2.json', - 'list_R2': 'mops_output\\list_R2.json', - 'temp': 'mops_output\\temp\\', - 'mops_r1': 'mops_output\\mops_r1\\', - 'mops_r2': 'mops_output\\mops_r2\\' -} \ No newline at end of file + 'r1_mops_csv': os.path.join(base, 'mops_output', 'r1_mops.csv'), + 'r2_mops_csv': os.path.join(base, 'mops_output', 'r2_mops.csv'), + 'r1andr2_csv': os.path.join(base, 'mops_output', 'r1_r2.csv'), + 'kg_assembly_csv': os.path.join(base, 'mops_output', 'kg_analysis_1.csv'), + 'mops_am': os.path.join(base, 'mops_output', 'mops_am', ''), + 'r1_cbus': os.path.join(base, 'mops_output', 'r1_cbus', ''), + 'r2_cbus': os.path.join(base, 'mops_output', 'r2_cbus', ''), + 'list_R1': os.path.join(base, 'mops_output', 'list_R1.json'), + 'list_preR2': os.path.join(base, 'mops_output', 'list_preR2.json'), + 'list_R2': os.path.join(base, 'mops_output', 'list_R2.json'), + 'temp': os.path.join(base, 'mops_output', 'temp', ''), + 'mops_r1': os.path.join(base, 'mops_output', 'mops_r1', ''), + 'mops_r2': os.path.join(base, 'mops_output', 'mops_r2', '') +} diff --git a/MOPTools/MOP_Discovery/query_operations/config.json b/MOPTools/MOP_Discovery/query_operations/config.json new file mode 100644 index 00000000000..c1f9d487965 --- /dev/null +++ b/MOPTools/MOP_Discovery/query_operations/config.json @@ -0,0 +1,4 @@ +{ + "ontospecies": "http://localhost:48082/blazegraph/namespace/ontomops/sparql", + "ontomops": "http://localhost:48082/blazegraph/namespace/ontomops/sparql" +} \ No newline at end of file diff --git a/MOPTools/MOP_Discovery/query_operations/queryTemplates.py b/MOPTools/MOP_Discovery/query_operations/queryTemplates.py index d6740b8815e..959fb17ec7d 100644 --- a/MOPTools/MOP_Discovery/query_operations/queryTemplates.py +++ b/MOPTools/MOP_Discovery/query_operations/queryTemplates.py @@ -8,26 +8,26 @@ def getMOPIRIs(): """This function collects all MOPs from the KG.""" queryStr = """ - PREFIX OntoMOPs: + PREFIX OntoMOPs: PREFIX OntoSpecies: PREFIX Measure: PREFIX rdf: - SELECT ?mopIRI ?MOPFormula ?speciesIRI ?CBUFormula ?NumberValue ?Planarity ?Modularity ?Symbol - WHERE - { - ?mopIRI OntoMOPs:hasMOPFormula ?MOPFormula . - ?mopIRI OntoMOPs:hasAssemblyModel ?AssemblyModel . - ?AssemblyModel OntoMOPs:hasPolyhedralShape ?PolhedralShape . - ?PolhedralShape OntoMOPs:hasSymbol ?Symbol . - ?AssemblyModel OntoMOPs:hasGenericBuildingUnitNumber ?GBUNumber . - ?GBUNumber OntoMOPs:isNumberOf ?GBU . - ?GBU OntoMOPs:hasPlanarity ?Planarity . - ?GBU OntoMOPs:hasModularity ?Modularity . - ?GBUNumber OntoSpecies:value ?NumberValue . - ?mopIRI OntoMOPs:hasChemicalBuildingUnit ?CBU . - ?CBU OntoMOPs:isFunctioningAs ?GBU . - ?CBU OntoMOPs:hasCBUFormula ?CBUFormula . - ?CBU OntoSpecies:hasUniqueSpecies ?speciesIRI . + SELECT ?mopIRI ?MOPFormula ?CBUFormula ?NumberValue ?Planarity ?Modularity ?Symbol ?cbuIRI + WHERE { + ?mopIRI OntoMOPs:hasMOPFormula ?MOPFormula . + ?mopIRI OntoMOPs:hasProvenance ?Provenance . + ?mopIRI OntoMOPs:hasAssemblyModel ?AssemblyModel . + ?AssemblyModel OntoMOPs:hasPolyhedralShape ?PolhedralShape . + ?PolhedralShape OntoMOPs:hasSymbol ?Symbol . + ?AssemblyModel OntoMOPs:hasGenericBuildingUnitNumber ?GBUNumber . + ?GBUNumber OntoMOPs:isNumberOf ?GBU . + ?GBU OntoMOPs:hasGBUType ?GBUType . + ?GBUType OntoMOPs:hasPlanarity ?Planarity . + ?GBUType OntoMOPs:hasModularity ?Modularity . + ?GBUNumber OntoMOPs:hasUnitNumberValue ?NumberValue . + ?mopIRI OntoMOPs:hasChemicalBuildingUnit ?cbuIRI . + ?cbuIRI OntoMOPs:isFunctioningAs ?GBU . + ?cbuIRI OntoMOPs:hasCBUFormula ?CBUFormula . }""" return queryStr @@ -35,35 +35,34 @@ def mop_GBUs(mopIRI): """Queries and collects MOP data relating to the GBUs/CBUs. As every MOP has two GBUs, returns back information on both. """ queryStr = """ - PREFIX OntoMOPs: + PREFIX OntoMOPs: PREFIX OntoSpecies: PREFIX Measure: PREFIX rdf: PREFIX rdfs: - SELECT ?mopIRI ?MOPFormula ?CBUFormula ?NumberValue ?Planarity ?Modularity ?Symmetry ?MOPReference ?CBUType ?speciesIRI ?OuterCoordination ?CBUFunctionalGroup ?Direction - WHERE - { - ?mopIRI OntoMOPs:hasMOPFormula ?MOPFormula . - ?mopIRI OntoMOPs:hasProvenance ?Provenance . - ?Provenance OntoMOPs:hasReferenceDOI ?MOPReference . - ?mopIRI OntoMOPs:hasAssemblyModel ?AssemblyModel . - ?AssemblyModel OntoMOPs:hasSymmetryPointGroup ?Symmetry . - ?AssemblyModel OntoMOPs:hasGenericBuildingUnitNumber ?GBUNumber . - ?GBUNumber OntoMOPs:isNumberOf ?GBU . - ?GBU OntoMOPs:hasPlanarity ?Planarity . - ?GBU OntoMOPs:hasModularity ?Modularity . - ?GBUNumber OntoSpecies:value ?NumberValue . - ?mopIRI OntoMOPs:hasChemicalBuildingUnit ?CBU . - ?CBU OntoMOPs:isFunctioningAs ?GBU . - ?CBU OntoMOPs:hasCBUFormula ?CBUFormula . - ?CBU OntoMOPs:hasBindingSite ?CBUBindingSite . - ?CBU OntoMOPs:hasBindingDirection ?BindingDirection . - ?BindingDirection rdf:type ?Direction. - ?CBUBindingSite OntoMOPs:hasOuterCoordinationNumber ?OuterCoordination . - ?CBUBindingSite rdfs:label ?CBUFunctionalGroup. - ?CBUBindingSite rdf:type ?CBUType. - ?CBU OntoSpecies:hasUniqueSpecies ?speciesIRI . - FILTER ((?mopIRI) = <#MOPIRI>) . + SELECT DISTINCT ?mopIRI ?MOPFormula ?CBUFormula ?NumberValue ?Planarity ?Modularity ?Symmetry ?MOPReference ?CBUType ?OuterCoordination ?CBUFunctionalGroup ?Direction ?cbuIRI + WHERE { + values ?mopIRI {<#MOPIRI>} + ?mopIRI OntoMOPs:hasMOPFormula ?MOPFormula . + ?mopIRI OntoMOPs:hasProvenance ?Provenance . + ?Provenance OntoMOPs:hasReferenceDOI ?MOPReference . + ?mopIRI OntoMOPs:hasAssemblyModel ?AssemblyModel . + ?AssemblyModel OntoMOPs:hasSymmetryPointGroup ?Symmetry . + ?AssemblyModel OntoMOPs:hasGenericBuildingUnitNumber ?GBUNumber . + ?GBUNumber OntoMOPs:isNumberOf ?GBU . + ?GBU OntoMOPs:hasGBUType ?GBUType . + ?GBUType OntoMOPs:hasPlanarity ?Planarity . + ?GBUType OntoMOPs:hasModularity ?Modularity . + ?GBUNumber OntoMOPs:hasUnitNumberValue ?NumberValue . + ?mopIRI OntoMOPs:hasChemicalBuildingUnit ?cbuIRI . + ?cbuIRI OntoMOPs:isFunctioningAs ?GBU . + ?cbuIRI OntoMOPs:hasCBUFormula ?CBUFormula . + ?cbuIRI OntoMOPs:hasBindingSite ?CBUBindingSite . + ?cbuIRI OntoMOPs:hasBindingDirection ?BindingDirection . + ?BindingDirection rdf:type ?Direction. + ?CBUBindingSite OntoMOPs:hasOuterCoordinationNumber ?OuterCoordination . + ?CBUBindingSite OntoMOPs:hasBindingFragment ?CBUFunctionalGroup. + ?CBUBindingSite rdf:type ?CBUType. }""" queryStr = queryStr.replace('#MOPIRI', str(mopIRI)) return queryStr @@ -71,7 +70,7 @@ def mop_GBUs(mopIRI): def mop_reference(string, mop_symmetry): """Using a MOP formula and Symmetry point group, checks if the MOP exists in the KG.""" queryStr = """ - PREFIX OntoMOPs: + PREFIX OntoMOPs: PREFIX OntoSpecies: PREFIX Measure: PREFIX rdf: @@ -80,7 +79,7 @@ def mop_reference(string, mop_symmetry): { ?mopIRI OntoMOPs:hasMOPFormula ?MOPFormula . ?mopIRI OntoMOPs:hasProvenance ?Provenance . - ?Provenance OntoMOPs:hasReferenceDOI ?MOPReference . + ?Provenance OntoMOPs:hasReferenceDOI ?MOPReference . FILTER ((?MOPFormula) = "#MOPReference"). ?mopIRI OntoMOPs:hasAssemblyModel ?AssemblyModel . ?AssemblyModel OntoMOPs:hasSymmetryPointGroup ?Symmetry . @@ -92,7 +91,7 @@ def mop_reference(string, mop_symmetry): -def species_properties(speciesIRI): +def species_properties(cbuIRI): """Queries the molecular weight and mass of every CBU. """ queryStr = """ PREFIX OntoSpecies: @@ -102,13 +101,12 @@ def species_properties(speciesIRI): SELECT ?MolecularWeightValue ?MolecularChargeValue WHERE { - ?species OntoSpecies:hasMolecularWeight ?MolecularWeight . - ?MolecularWeight OntoSpecies:value ?MolecularWeightValue . - ?species OntoSpecies:hasCharge ?MolecularCharge . - ?MolecularCharge OntoSpecies:value ?MolecularChargeValue . - - FILTER ((?species) = <#SPECIESIRI>) . + values ?cbu {<#CBUIRI>} + ?cbu OntoSpecies:hasMolecularWeight ?MolecularWeight . + ?MolecularWeight Measure:hasValue/Measure:hasNumericalValue ?MolecularWeightValue . + ?cbu OntoSpecies:hasCharge ?MolecularCharge . + ?MolecularCharge Measure:hasValue/Measure:hasNumericalValue ?MolecularChargeValue . }""" - queryStr = queryStr.replace('#SPECIESIRI', str(speciesIRI)) + queryStr = queryStr.replace('#CBUIRI', str(cbuIRI)) return queryStr diff --git a/MOPTools/MOP_Discovery/query_operations/queryendpoint.py b/MOPTools/MOP_Discovery/query_operations/queryendpoint.py index 2c0cb42cfe3..0dd92b64314 100644 --- a/MOPTools/MOP_Discovery/query_operations/queryendpoint.py +++ b/MOPTools/MOP_Discovery/query_operations/queryendpoint.py @@ -1,4 +1,15 @@ -SPARQL_ENDPOINTS = { - 'ontospecies': 'http://theworldavatar.com/blazegraph/namespace/ontospecies/sparql', - 'ontomops': 'https://kg.cmclinnovations.com/blazegraph_geo/namespace/ontomops/sparql' -} \ No newline at end of file +import json +import os + + +config_fpath = os.path.join(os.path.dirname(__file__), 'config.json') + +class SPARQLEndpoints: + def __init__(self, ontospecies, ontomops): + self.ontospecies = ontospecies + self.ontomops = ontomops + +with open(config_fpath, 'r') as file: + config = json.load(file) + +SPARQL_ENDPOINTS = SPARQLEndpoints(config['ontospecies'], config['ontomops']) diff --git a/MOPTools/MOP_Discovery/set_operations/lib1_creation.py b/MOPTools/MOP_Discovery/set_operations/lib1_creation.py index 8593dcb4cfe..61e2fa39f61 100644 --- a/MOPTools/MOP_Discovery/set_operations/lib1_creation.py +++ b/MOPTools/MOP_Discovery/set_operations/lib1_creation.py @@ -82,7 +82,7 @@ def lib1_Creation(uniques): def query_speciesIRI(cbuiri): """For each CBU queries its OntoSpecies Information.""" - result = querykg(SPARQL_ENDPOINTS['ontospecies'], species_properties(cbuiri)) #The query gets a full list of IRIs + result = querykg(SPARQL_ENDPOINTS.ontospecies, species_properties(cbuiri)) #The query gets a full list of IRIs cbu_properties = [] # Each IRI is saved in the list of refined IRIs if result: if 'MolecularWeightValue' in result[0].keys(): diff --git a/MOPTools/twa_mops/.gitignore b/MOPTools/twa_mops/.gitignore new file mode 100644 index 00000000000..bc463f8daec --- /dev/null +++ b/MOPTools/twa_mops/.gitignore @@ -0,0 +1,6 @@ +*.txt +*.ttl +*.xyz +*.csv +*.json +*.owl diff --git a/MOPTools/twa_mops/alg1.py b/MOPTools/twa_mops/alg1.py new file mode 100644 index 00000000000..685a9d6e982 --- /dev/null +++ b/MOPTools/twa_mops/alg1.py @@ -0,0 +1,4 @@ +import os +alg1_fpath = os.path.join(os.path.dirname(__file__), "algs_sparql", "alg1.sparql") +with open(alg1_fpath, "r") as file: + alg1 = file.read() diff --git a/MOPTools/twa_mops/alg2.py b/MOPTools/twa_mops/alg2.py new file mode 100644 index 00000000000..7562787c440 --- /dev/null +++ b/MOPTools/twa_mops/alg2.py @@ -0,0 +1,7 @@ +# NOTE if we would like to allow CBUs functioning as 3-planar to function as 2-bent in new MOPs +# we can change "=" in `filter (?gbu_modularity >= ?metal_gbu_modularity)` and `filter (?_gbu_modularity >= ?organic_gbu_modularity)` +# to ">=" +import os +alg2_fpath = os.path.join(os.path.dirname(__file__), "algs_sparql", "alg2.sparql") +with open(alg2_fpath, "r") as file: + alg2 = file.read() diff --git a/MOPTools/twa_mops/algs_sparql/alg1.sparql b/MOPTools/twa_mops/algs_sparql/alg1.sparql new file mode 100644 index 00000000000..3a8570057fe --- /dev/null +++ b/MOPTools/twa_mops/algs_sparql/alg1.sparql @@ -0,0 +1,46 @@ +prefix os: +prefix om: +prefix mops: +prefix rdf: +prefix rdfs: +select distinct ?mop_formula ?mop_mw ?mop_charge ?am ?metal ?organic ?metal_gbu ?organic_gbu # ?metal_formula ?organic_formula ?am_label ?metal_gbu_label ?organic_gbu_label ?metal_gbu_number ?organic_gbu_number +where { + { + select distinct ?metal_formula ?metal_mw ?metal_charge ?metal_gbu_label ?am_label ?metal_gbu_number ?am ?metal_gbu ?metal ?metal_binding ?metal_ocn + where { + ?metal a mops:ChemicalBuildingUnit; mops:hasBindingSite ?metal_site; mops:hasBindingDirection/rdf:type ?metal_binding. + ?metal_site rdf:type mops:MetalSite; mops:hasOuterCoordinationNumber ?metal_ocn. + ?metal mops:isFunctioningAs ?metal_gbu; mops:hasCBUFormula ?metal_formula. + ?metal ^mops:hasChemicalBuildingUnit ?existing_mop. + ?existing_mop mops:hasAssemblyModel ?am; mops:hasMOPFormula ?existing_mop_formula. + ?am a mops:AssemblyModel; rdfs:label ?am_label; mops:hasSymmetryPointGroup ?am_symmetry. + ?metal_gbu ^mops:hasGenericBuildingUnit ?am; mops:hasGBUType/rdfs:label ?metal_gbu_label. + ?metal_gbu_n mops:isNumberOf ?metal_gbu; mops:hasUnitNumberValue ?metal_gbu_number. + ?metal os:hasMolecularWeight/om:hasValue/om:hasNumericalValue ?metal_mw; os:hasCharge/om:hasValue/om:hasNumericalValue ?metal_charge. + } + } + { + select distinct ?organic_formula ?organic_mw ?organic_charge ?organic_gbu_label ?organic_gbu_number ?am ?organic_gbu ?organic ?organic_binding ?organic_ocn + where { + ?organic a mops:ChemicalBuildingUnit; mops:hasBindingSite ?organic_site; mops:hasBindingDirection/rdf:type ?organic_binding. + ?organic_site rdf:type mops:OrganicSite; mops:hasOuterCoordinationNumber ?organic_ocn. + ?organic mops:isFunctioningAs ?organic_gbu; mops:hasCBUFormula ?organic_formula. + ?organic ^mops:hasChemicalBuildingUnit ?_existing_mop. + ?_existing_mop mops:hasAssemblyModel ?am; mops:hasMOPFormula ?_existing_mop_formula. + ?am a mops:AssemblyModel; rdfs:label ?am_label; mops:hasSymmetryPointGroup ?am_symmetry. + ?organic_gbu ^mops:hasGenericBuildingUnit ?am; mops:hasGBUType/rdfs:label ?organic_gbu_label. + ?organic_gbu_n mops:isNumberOf ?organic_gbu; mops:hasUnitNumberValue ?organic_gbu_number. + ?organic os:hasMolecularWeight/om:hasValue/om:hasNumericalValue ?organic_mw; os:hasCharge/om:hasValue/om:hasNumericalValue ?organic_charge. + } + } + filter (?metal_gbu != ?organic_gbu) + filter (?metal_ocn = ?organic_ocn) + filter (?metal_binding = ?organic_binding) + bind (?organic_mw*?organic_gbu_number+?metal_mw*?metal_gbu_number as ?mop_mw) + bind (?organic_charge*?organic_gbu_number+?metal_charge*?metal_gbu_number as ?mop_charge) + bind (concat(?metal_formula, str(?metal_gbu_number), ?organic_formula, str(?organic_gbu_number)) as ?mop_formula) + filter not exists { + ?_mop mops:hasAssemblyModel ?am; mops:hasChemicalBuildingUnit ?metal, ?organic. + } +} +order by ?organic_mw diff --git a/MOPTools/twa_mops/algs_sparql/alg2.sparql b/MOPTools/twa_mops/algs_sparql/alg2.sparql new file mode 100644 index 00000000000..6cfd4418444 --- /dev/null +++ b/MOPTools/twa_mops/algs_sparql/alg2.sparql @@ -0,0 +1,52 @@ +prefix os: +prefix om: +prefix mops: +prefix rdf: +prefix rdfs: +select distinct ?mop_formula ?mop_mw ?mop_charge ?am ?metal ?organic ?metal_gbu ?organic_gbu ?metal_formula ?organic_formula ?am_label ?am_symmetry ?metal_gbu_label ?organic_gbu_label ?metal_gbu_number ?organic_gbu_number +where { + { + select distinct ?metal_formula ?metal_mw ?metal_charge ?metal_gbu_label ?am_label ?am_symmetry ?metal_gbu_number ?am ?metal_gbu ?metal ?metal_binding ?metal_ocn + where { + ?metal a mops:ChemicalBuildingUnit; mops:hasBindingSite ?metal_site; mops:hasBindingDirection/rdf:type ?metal_binding. + ?metal_site rdf:type mops:MetalSite; mops:hasOuterCoordinationNumber ?metal_ocn. + ?metal mops:isFunctioningAs ?_metal_gbu; mops:hasCBUFormula ?metal_formula. + filter exists {?metal ^mops:hasChemicalBuildingUnit/mops:hasAssemblyModel/mops:hasGenericBuildingUnit ?_metal_gbu.} + ?_metal_gbu mops:hasGBUType/mops:hasModularity ?_metal_gbu_modularity. + ?_metal_gbu ^mops:isFunctioningAs/mops:isFunctioningAs ?metal_gbu. + ?metal_gbu mops:hasGBUType/mops:hasModularity ?metal_gbu_modularity. + ?metal_gbu ^mops:hasGenericBuildingUnit ?am; mops:hasGBUType/rdfs:label ?metal_gbu_label. + ?am a mops:AssemblyModel; rdfs:label ?am_label; mops:hasSymmetryPointGroup ?am_symmetry. + ?metal_gbu_n mops:isNumberOf ?metal_gbu; mops:hasUnitNumberValue ?metal_gbu_number. + ?metal os:hasMolecularWeight/om:hasValue/om:hasNumericalValue ?metal_mw; os:hasCharge/om:hasValue/om:hasNumericalValue ?metal_charge. + filter (?_metal_gbu_modularity = ?metal_gbu_modularity) + } + } + { + select distinct ?organic_formula ?organic_mw ?organic_charge ?organic_gbu_label ?organic_gbu_number ?am ?organic_gbu ?organic ?organic_binding ?organic_ocn #?equiv_cbu + where { + ?organic a mops:ChemicalBuildingUnit; mops:hasBindingSite ?organic_site; mops:hasBindingDirection/rdf:type ?organic_binding. + ?organic_site rdf:type mops:OrganicSite; mops:hasOuterCoordinationNumber ?organic_ocn. + ?organic mops:isFunctioningAs ?_organic_gbu; mops:hasCBUFormula ?organic_formula. + filter exists {?organic ^mops:hasChemicalBuildingUnit/mops:hasAssemblyModel/mops:hasGenericBuildingUnit ?_organic_gbu.} + ?_organic_gbu mops:hasGBUType/mops:hasModularity ?_organic_gbu_modularity. + ?_organic_gbu ^mops:isFunctioningAs ?equiv_cbu. ?equiv_cbu mops:isFunctioningAs ?organic_gbu. + ?organic_gbu mops:hasGBUType/mops:hasModularity ?organic_gbu_modularity. + ?organic_gbu ^mops:hasGenericBuildingUnit ?am; mops:hasGBUType/rdfs:label ?organic_gbu_label. + ?am a mops:AssemblyModel; rdfs:label ?am_label; mops:hasSymmetryPointGroup ?am_symmetry. + ?organic_gbu_n mops:isNumberOf ?organic_gbu; mops:hasUnitNumberValue ?organic_gbu_number. + ?organic os:hasMolecularWeight/om:hasValue/om:hasNumericalValue ?organic_mw; os:hasCharge/om:hasValue/om:hasNumericalValue ?organic_charge. + filter (?_organic_gbu_modularity = ?organic_gbu_modularity) + } + } + filter (?metal_gbu != ?organic_gbu) + filter (?metal_ocn = ?organic_ocn) + filter (?metal_binding = ?organic_binding) + bind (?organic_mw*?organic_gbu_number+?metal_mw*?metal_gbu_number as ?mop_mw) + bind (?organic_charge*?organic_gbu_number+?metal_charge*?metal_gbu_number as ?mop_charge) + bind (concat(?metal_formula, str(?metal_gbu_number), ?organic_formula, str(?organic_gbu_number)) as ?mop_formula) + filter not exists { + ?_mop mops:hasAssemblyModel ?am; mops:hasChemicalBuildingUnit ?metal, ?organic. + } +} +order by ?am_label diff --git a/MOPTools/twa_mops/assembly.ipynb b/MOPTools/twa_mops/assembly.ipynb new file mode 100644 index 00000000000..7c5dfed7317 --- /dev/null +++ b/MOPTools/twa_mops/assembly.ipynb @@ -0,0 +1,2275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Info: Initializing JPSGateway with resName=JpsBaseLib, jarPath=None\n" + ] + } + ], + "source": [ + "from geo import *\n", + "from ontomops import *\n", + "from instantiation import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "def create_xyz_file(df: pd.DataFrame, filename):\n", + " num_atoms = len(df)\n", + "\n", + " with open(filename, 'w') as f:\n", + " f.write(f\"{num_atoms}\\n\")\n", + " f.write(\"XYZ file generated from DataFrame\\n\")\n", + " for _, row in df.iterrows():\n", + " f.write(f\"{row['Atom']} {row['X']} {row['Y']} {row['Z']}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "def load_am_cbu_json(am_json_file, cbu1_file, cbu2_file):\n", + " with open(am_json_file, \"r\") as file:\n", + " am_json = json.load(file)\n", + " with open(cbu1_file, \"r\") as file:\n", + " cbu1_json = json.load(file)\n", + " with open(cbu2_file, \"r\") as file:\n", + " cbu2_json = json.load(file)\n", + " return am_json, cbu1_json, cbu2_json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plotly.com" + }, + "data": [ + { + "hovertemplate": "Atom=C
X=%{x}
Y=%{y}
Z=%{z}", + "legendgroup": "C", + "marker": { + "color": "#636efa", + "size": 3, + "symbol": "circle" + }, + "mode": "markers", + "name": "C", + "scene": "scene", + "showlegend": true, + "type": "scatter3d", + "x": [ + 2.607654245524126, + -2.0874965849083955e-16, + -2.607654245524126, + 2.5360944302466167e-16, + -2.607654245524126, + 2.0874965849083955e-16, + 2.607654245524126, + -2.5360944302466167e-16, + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + 2.607654245524126, + -1.731480751879776e-16, + -2.607654245524126, + 2.1800785972179975e-16, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 3.778609884755856e-16, + -2.6076542455241265, + -4.227207730094077e-16, + 2.6076542455241265, + 0.12069273843736913, + 0.07071816746217535, + 0.1206927384373694, + 0.07071816746217585, + -0.07867593113763079, + 0.03788248130687623, + -0.10081036155806443, + -0.00014660237995085432, + -0.10081036155806641, + -0.00014660237995283234, + -0.07867593113763226, + 0.03788248130687516, + 0.12069273843737205, + 0.07071816746218049, + 0.12069273843736646, + 0.07071816746217066, + -0.07867593113764637, + 0.03788248130686486, + -0.10081036155808533, + -0.00014660237997158167, + -0.1008103615580454, + -0.00014660237993212235, + -0.07867593113761663, + 0.03788248130688648, + -9.905509021560169, + -8.633056347045365, + -10.873850039095055, + -10.335645732455864, + -12.519904517609742, + -11.140916587773123, + -13.005686787323587, + -11.99525615751498, + -6.080212968811203, + -5.148009562205266, + -7.360716970463924, + -7.390222109140599, + -0.12069273843736561, + -0.0707181674621768, + -0.12069273843736521, + -0.07071816746217612, + 0.0786759311376341, + -0.037882481306876915, + 0.10081036155806626, + 0.0001466023799476298, + 0.10081036155806346, + 0.0001466023799448667, + 0.07867593113763202, + -0.03788248130687843, + -9.905509021560167, + -8.633056347045365, + -10.873850039095053, + -10.335645732455863, + -12.519904517609742, + -11.140916587773122, + -13.005686787323585, + -11.99525615751498, + -6.080212968811203, + -5.148009562205265, + -7.3607169704639235, + -7.390222109140598, + -9.905509021560169, + -8.633056347045365, + -10.873850039095055, + -10.335645732455864, + -12.519904517609744, + -11.140916587773123, + -13.005686787323587, + -11.995256157514982, + -6.080212968811203, + -5.1480095622052655, + -7.360716970463924, + -7.390222109140598, + 9.905509021560167, + 8.633056347045365, + 10.873850039095053, + 10.335645732455863, + 12.519904517609742, + 11.140916587773122, + 13.005686787323585, + 11.99525615751498, + 6.080212968811203, + 5.148009562205265, + 7.360716970463923, + 7.390222109140598, + -0.12069273843744432, + -0.07071816746217413, + -0.120692738437429, + -0.07071816746214717, + 0.07867593113764576, + -0.03788248130681115, + 0.1008103615581298, + 0.00014660238010403106, + 0.10081036155802013, + 0.00014660237999562016, + 0.0786759311375641, + -0.037882481306870525, + 9.905509021560167, + 8.633056347045365, + 10.873850039095053, + 10.335645732455863, + 12.519904517609742, + 11.140916587773122, + 13.005686787323585, + 11.99525615751498, + 6.080212968811202, + 5.148009562205265, + 7.360716970463923, + 7.390222109140598, + -10.873850005423304, + -10.335645698784113, + -9.905508987888416, + -8.633056313373613, + -7.360716936792171, + -7.390222075468847, + -6.080212935139451, + -5.148009528533514, + -13.005686753651837, + -11.99525612384323, + -12.519904483937992, + -11.140916554101373, + 9.905509021560167, + 8.633056347045365, + 10.873850039095053, + 10.335645732455863, + 12.519904517609742, + 11.140916587773122, + 13.005686787323585, + 11.99525615751498, + 6.080212968811203, + 5.148009562205265, + 7.3607169704639235, + 7.390222109140598, + 9.905509021560169, + 8.633056347045367, + 10.873850039095055, + 10.335645732455864, + 12.519904517609742, + 11.140916587773123, + 13.005686787323587, + 11.99525615751498, + 6.080212968811205, + 5.148009562205266, + 7.360716970463924, + 7.3902221091406 + ], + "y": [ + 2.1800785972179975e-16, + 2.607654245524126, + -1.731480751879776e-16, + -2.607654245524126, + -2.1800785972179975e-16, + -2.607654245524126, + 1.731480751879776e-16, + 2.607654245524126, + 2.1800785972179975e-16, + 2.607654245524126, + -1.731480751879776e-16, + -2.607654245524126, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 2.1800785972179975e-16, + 2.607654245524126, + -1.731480751879776e-16, + -2.607654245524126, + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + 9.905509021560167, + 8.633056347045365, + 10.873850039095055, + 10.335645732455864, + 12.519904517609744, + 11.140916587773123, + 13.005686787323587, + 11.995256157514982, + 6.080212968811203, + 5.148009562205266, + 7.360716970463924, + 7.390222109140599, + -10.873850005423302, + -10.335645698784113, + -9.905508987888414, + -8.633056313373613, + -7.360716936792171, + -7.390222075468848, + -6.080212935139452, + -5.148009528533514, + -13.005686753651837, + -11.99525612384323, + -12.519904483937992, + -11.140916554101372, + -0.12069273843736884, + -0.07071816746217546, + -0.12069273843736913, + -0.07071816746217571, + 0.07867593113763159, + -0.037882481306875875, + 0.10081036155806525, + 0.0001466023799513112, + 0.10081036155806576, + 0.00014660237995158865, + 0.07867593113763212, + -0.03788248130687572, + 9.905509021560169, + 8.633056347045367, + 10.873850039095055, + 10.335645732455864, + 12.519904517609744, + 11.140916587773123, + 13.005686787323587, + 11.995256157514982, + 6.080212968811203, + 5.1480095622052655, + 7.3607169704639235, + 7.390222109140599, + 10.873850005423304, + 10.335645698784113, + 9.905508987888416, + 8.633056313373613, + 7.36071693679217, + 7.390222075468847, + 6.08021293513945, + 5.148009528533512, + 13.005686753651837, + 11.99525612384323, + 12.519904483937992, + 11.140916554101372, + 0.12069273843736882, + 0.07071816746217609, + 0.12069273843736861, + 0.07071816746217564, + -0.07867593113763265, + 0.03788248130687569, + -0.10081036155806598, + -0.0001466023799515068, + -0.10081036155806458, + -0.00014660237995021746, + -0.07867593113763116, + 0.03788248130687669, + -10.873850005423302, + -10.335645698784113, + -9.905508987888416, + -8.633056313373613, + -7.36071693679217, + -7.390222075468847, + -6.08021293513945, + -5.148009528533512, + -13.005686753651837, + -11.99525612384323, + -12.519904483937992, + -11.140916554101372, + -10.873850005423296, + -10.33564569878411, + -9.905508987888409, + -8.63305631337361, + -7.360716936792175, + -7.390222075468845, + -6.080212935139457, + -5.148009528533514, + -13.00568675365184, + -11.99525612384323, + -12.519904483937996, + -11.14091655410137, + 0.12069273843736966, + 0.07071816746217562, + 0.12069273843736956, + 0.07071816746217532, + -0.07867593113763197, + 0.03788248130687518, + -0.10081036155806593, + -0.00014660237995292081, + -0.1008103615580645, + -0.00014660237995179574, + -0.07867593113763094, + 0.03788248130687614, + -9.905509021560167, + -8.633056347045365, + -10.873850039095053, + -10.335645732455863, + -12.51990451760974, + -11.140916587773122, + -13.005686787323583, + -11.995256157514978, + -6.080212968811204, + -5.148009562205266, + -7.360716970463924, + -7.390222109140598, + 10.873850005423304, + 10.335645698784113, + 9.905508987888416, + 8.633056313373613, + 7.36071693679217, + 7.390222075468846, + 6.08021293513945, + 5.148009528533512, + 13.005686753651837, + 11.99525612384323, + 12.519904483937992, + 11.140916554101372, + -0.12069273843736954, + -0.07071816746217582, + -0.12069273843736938, + -0.07071816746217524, + 0.07867593113763172, + -0.03788248130687484, + 0.10081036155806639, + 0.00014660237995352135, + 0.10081036155806428, + 0.00014660237995177514, + 0.07867593113763066, + -0.03788248130687593 + ], + "z": [ + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + -12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + 12.394264186770926, + -2.607654245524126, + 2.0874965849083955e-16, + 2.607654245524126, + -2.5360944302466167e-16, + 2.536094430246617e-16, + 2.607654245524126, + -2.0874965849083955e-16, + -2.607654245524126, + 2.607654245524126, + -2.0874965849083955e-16, + -2.607654245524126, + 2.5360944302466167e-16, + -2.6076542455241265, + -4.75654047949693e-16, + 2.6076542455241265, + 4.3079426341587083e-16, + 10.873850005423304, + 10.335645698784113, + 9.905508987888416, + 8.633056313373613, + 7.36071693679217, + 7.390222075468847, + 6.08021293513945, + 5.148009528533512, + 13.005686753651837, + 11.99525612384323, + 12.519904483937992, + 11.140916554101372, + 9.905509021560167, + 8.633056347045365, + 10.873850039095053, + 10.335645732455863, + 12.51990451760974, + 11.140916587773122, + 13.005686787323583, + 11.995256157514978, + 6.080212968811203, + 5.1480095622052655, + 7.360716970463923, + 7.390222109140598, + -10.873850005423304, + -10.335645698784113, + -9.905508987888416, + -8.633056313373613, + -7.360716936792169, + -7.390222075468846, + -6.08021293513945, + -5.148009528533512, + -13.005686753651837, + -11.995256123843232, + -12.519904483937992, + -11.140916554101373, + -10.873850005423304, + -10.335645698784113, + -9.905508987888417, + -8.633056313373613, + -7.36071693679217, + -7.390222075468847, + -6.08021293513945, + -5.148009528533512, + -13.005686753651837, + -11.99525612384323, + -12.519904483937992, + -11.140916554101372, + -0.12069273843736958, + -0.07071816746217545, + -0.12069273843736965, + -0.07071816746217555, + 0.07867593113763101, + -0.03788248130687567, + 0.1008103615580649, + 0.0001466023799518906, + 0.10081036155806593, + 0.00014660237995289414, + 0.07867593113763165, + -0.03788248130687514, + 10.873850005423304, + 10.335645698784113, + 9.905508987888416, + 8.633056313373613, + 7.36071693679217, + 7.390222075468847, + 6.08021293513945, + 5.148009528533512, + 13.005686753651837, + 11.99525612384323, + 12.519904483937992, + 11.140916554101372, + -0.1206927384373698, + -0.07071816746217538, + -0.12069273843737002, + -0.07071816746217555, + 0.07867593113763117, + -0.03788248130687553, + 0.1008103615580651, + 0.0001466023799522318, + 0.10081036155806569, + 0.00014660237995272403, + 0.07867593113763148, + -0.03788248130687525, + -9.905509021560164, + -8.633056347045363, + -10.87385003909505, + -10.33564573245586, + -12.519904517609746, + -11.140916587773122, + -13.00568678732359, + -11.99525615751498, + -6.0802129688112085, + -5.148009562205266, + -7.360716970463927, + -7.390222109140598, + -10.873850005423304, + -10.335645698784113, + -9.905508987888416, + -8.633056313373613, + -7.360716936792171, + -7.390222075468848, + -6.080212935139451, + -5.148009528533513, + -13.005686753651837, + -11.995256123843232, + -12.519904483937992, + -11.140916554101373, + -0.12069273843736963, + -0.07071816746217556, + -0.12069273843736934, + -0.07071816746217538, + 0.07867593113763174, + -0.037882481306875194, + 0.10081036155806587, + 0.00014660237995274366, + 0.10081036155806491, + 0.00014660237995180463, + 0.07867593113763102, + -0.037882481306875763, + 0.1206927384373686, + 0.07071816746217571, + 0.12069273843736868, + 0.0707181674621757, + -0.07867593113763213, + 0.03788248130687569, + -0.10081036155806573, + -0.00014660237995123835, + -0.10081036155806516, + -0.00014660237995073972, + -0.0786759311376317, + 0.03788248130687603, + 10.873850005423304, + 10.335645698784113, + 9.905508987888416, + 8.633056313373613, + 7.360716936792171, + 7.390222075468848, + 6.080212935139451, + 5.148009528533514, + 13.005686753651837, + 11.99525612384323, + 12.519904483937992, + 11.140916554101373 + ] + }, + { + "hovertemplate": "Atom=O
X=%{x}
Y=%{y}
Z=%{z}", + "legendgroup": "O", + "marker": { + "color": "#EF553B", + "size": 3, + "symbol": "circle" + }, + "mode": "markers", + "name": "O", + "scene": "scene", + "showlegend": true, + "type": "scatter3d", + "x": [ + 1.98394363811072, + -1.4613835639467637e-16, + -1.4613835639467637e-16, + 1.98394363811072, + -1.98394363811072, + -1.98394363811072, + 1.9099814092849852e-16, + 1.9099814092849852e-16, + -1.98394363811072, + 1.4613835639467637e-16, + 1.4613835639467637e-16, + -1.98394363811072, + 1.98394363811072, + 1.98394363811072, + -1.9099814092849852e-16, + -1.9099814092849852e-16, + -13.549942186770926, + -13.549942186770926, + -11.238586186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926, + 1.9839436381107203, + -2.978596536071999e-17, + -2.327821546349594e-16, + 1.98394363811072, + -1.98394363811072, + -1.9839436381107203, + 2.776419391687815e-16, + 7.464574989454213e-17, + 13.549942186770926, + 13.549942186770926, + 11.238586186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + 2.9655701505580035e-16, + -1.9839436381107203, + -1.9839436381107203, + 2.7750859940500847e-16, + -3.2236838393883057e-16, + -3.4141679958962245e-16, + 1.9839436381107203, + 1.9839436381107203 + ], + "y": [ + 2.316549957629196e-16, + 1.98394363811072, + 1.98394363811072, + 2.316549957629196e-16, + -1.867952112290975e-16, + -1.867952112290975e-16, + -1.98394363811072, + -1.98394363811072, + -2.316549957629196e-16, + -1.98394363811072, + -1.98394363811072, + -2.316549957629196e-16, + 1.867952112290975e-16, + 1.867952112290975e-16, + 1.98394363811072, + 1.98394363811072, + 1.6661963670626586e-16, + 1.98394363811072, + 1.98394363811072, + 2.9669035481957344e-16, + -2.5183057028575124e-16, + -1.217598521724437e-16, + -1.98394363811072, + -1.98394363811072, + 13.549942186770926, + 13.549942186770926, + 11.238586186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + 2.9669035481957344e-16, + 1.98394363811072, + 1.98394363811072, + 1.6661963670626586e-16, + -1.217598521724437e-16, + -2.5183057028575124e-16, + -1.98394363811072, + -1.98394363811072, + -13.549942186770926, + -13.549942186770926, + -11.238586186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926 + ], + "z": [ + -13.549942186770926, + -13.549942186770926, + -11.238586186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926, + -13.549942186770926, + -11.238586186770926, + 13.549942186770926, + 13.549942186770926, + 11.238586186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + 13.549942186770926, + 11.238586186770926, + -1.9839436381107203, + -1.2189319193621678e-16, + 4.1416990472556954e-16, + -1.9839436381107198, + 1.9839436381107198, + 1.9839436381107203, + -4.590296892593917e-16, + 7.703340740239463e-17, + 2.5603349998515236e-16, + 1.9839436381107203, + 1.98394363811072, + 1.259627818718447e-16, + -8.110299733802255e-17, + -2.1117371545133019e-16, + -1.98394363811072, + -1.9839436381107203, + 1.9839436381107198, + -4.1416990472556954e-16, + 1.2189319193621678e-16, + 1.9839436381107203, + -1.9839436381107203, + -1.9839436381107198, + -7.703340740239463e-17, + 4.590296892593917e-16, + -1.9839436381107205, + -5.891004073726993e-16, + -1.494047940932905e-17, + -1.9839436381107198, + 1.9839436381107198, + 1.9839436381107205, + -2.99193051244931e-17, + 5.442406228388771e-16 + ] + }, + { + "hovertemplate": "Atom=Pd
X=%{x}
Y=%{y}
Z=%{z}", + "legendgroup": "Pd", + "marker": { + "color": "#00cc96", + "size": 3, + "symbol": "circle" + }, + "mode": "markers", + "name": "Pd", + "scene": "scene", + "showlegend": true, + "type": "scatter3d", + "x": [ + 2.2429892266911068e-17, + 2.2429892266911068e-17, + -2.2429892266911068e-17, + -2.2429892266911068e-17, + -11.323542186770926, + -13.464986186770926, + -7.604857176791138e-17, + 1.209083563017335e-16, + 11.323542186770926, + 13.464986186770926, + -2.2429892266911055e-17, + -2.242989226691108e-17 + ], + "y": [ + 2.2429892266911074e-17, + 2.2429892266911074e-17, + -2.2429892266911074e-17, + -2.2429892266911074e-17, + 2.2429892266911065e-17, + 2.2429892266911083e-17, + 11.323542186770926, + 13.464986186770926, + 2.2429892266911083e-17, + 2.2429892266911065e-17, + -11.323542186770926, + -13.464986186770926 + ], + "z": [ + -11.323542186770926, + -13.464986186770926, + 11.323542186770926, + 13.464986186770926, + 2.153181512076283e-16, + -2.601779357414504e-16, + 2.2429892266911058e-17, + 2.242989226691109e-17, + 2.601779357414504e-16, + -2.153181512076283e-16, + 2.153181512076283e-16, + -2.6017793574145046e-16 + ] + }, + { + "hovertemplate": "Atom=H
X=%{x}
Y=%{y}
Z=%{z}", + "legendgroup": "H", + "marker": { + "color": "#ab63fa", + "size": 3, + "symbol": "circle" + }, + "mode": "markers", + "name": "H", + "scene": "scene", + "showlegend": true, + "type": "scatter3d", + "x": [ + -0.15662636638440913, + -0.1905057843732489, + -0.19050578437325127, + -0.15662636638440774, + 0.15976124689062887, + 0.15976124689062932, + -0.15662636638439414, + -0.1905057843732737, + -0.19050578437322635, + -0.15662636638442254, + 0.1597612468906342, + 0.15976124689062385, + -8.230142663845783, + -14.057294908259486, + -5.839722040057799, + -13.157202611077246, + -10.133370830409136, + -11.929789043042401, + 0.15662636638441285, + 0.19050578437325336, + 0.19050578437325005, + 0.15662636638441485, + -0.15976124689062224, + -0.15976124689062152, + -8.230142663845783, + -14.057294908259482, + -5.839722040057798, + -13.157202611077244, + -10.133370830409135, + -11.929789043042401, + -8.230142663845783, + -14.057294908259486, + -5.839722040057798, + -13.157202611077246, + -10.133370830409136, + -11.929789043042401, + 8.23014266384578, + 14.057294908259482, + 5.839722040057798, + 13.157202611077242, + 10.133370830409135, + 11.929789043042401, + 0.15662636638427424, + 0.1905057843732878, + 0.19050578437315766, + 0.15662636638435223, + -0.15976124689076907, + -0.15976124689074064, + 8.230142663845783, + 14.057294908259482, + 5.839722040057798, + 13.157202611077242, + 10.133370830409135, + 11.929789043042401, + -13.157202577405494, + -5.8397220063860455, + -14.057294874587734, + -8.23014263017403, + -11.929789009370651, + -10.133370796737383, + 8.230142663845783, + 14.057294908259482, + 5.839722040057798, + 13.157202611077244, + 10.133370830409135, + 11.929789043042401, + 8.230142663845784, + 14.057294908259486, + 5.839722040057799, + 13.157202611077246, + 10.133370830409136, + 11.929789043042401 + ], + "y": [ + 8.230142663845783, + 14.057294908259486, + 5.839722040057799, + 13.157202611077246, + 10.133370830409136, + 11.929789043042403, + -13.157202577405492, + -5.8397220063860455, + -14.057294874587733, + -8.23014263017403, + -11.929789009370648, + -10.133370796737381, + 0.15662636638440897, + 0.1905057843732502, + 0.19050578437324997, + 0.15662636638440852, + -0.15976124689062846, + -0.15976124689062884, + 8.230142663845783, + 14.057294908259486, + 5.839722040057799, + 13.157202611077246, + 10.133370830409136, + 11.929789043042403, + 13.157202577405492, + 5.839722006386044, + 14.057294874587734, + 8.23014263017403, + 11.92978900937065, + 10.133370796737383, + -0.1566263663844084, + -0.1905057843732511, + -0.19050578437324892, + -0.1566263663844101, + 0.15976124689062793, + 0.15976124689062765, + -13.157202577405492, + -5.839722006386045, + -14.057294874587734, + -8.23014263017403, + -11.92978900937065, + -10.133370796737381, + -13.1572025774055, + -5.839722006386056, + -14.057294874587743, + -8.230142630174038, + -11.92978900937064, + -10.133370796737374, + -0.15662636638440797, + -0.1905057843732512, + -0.19050578437324933, + -0.15662636638440888, + 0.15976124689062945, + 0.1597612468906293, + -8.230142663845783, + -14.057294908259482, + -5.839722040057799, + -13.157202611077242, + -10.133370830409135, + -11.929789043042401, + 13.157202577405492, + 5.839722006386044, + 14.057294874587734, + 8.23014263017403, + 11.92978900937065, + 10.133370796737381, + 0.15662636638440725, + 0.19050578437325072, + 0.1905057843732491, + 0.1566263663844085, + -0.1597612468906302, + -0.15976124689062976 + ], + "z": [ + 13.157202577405494, + 5.839722006386045, + 14.057294874587734, + 8.23014263017403, + 11.92978900937065, + 10.133370796737383, + 8.230142663845783, + 14.057294908259482, + 5.839722040057798, + 13.157202611077242, + 10.133370830409135, + 11.9297890430424, + -13.157202577405494, + -5.839722006386044, + -14.057294874587736, + -8.23014263017403, + -11.92978900937065, + -10.133370796737381, + -13.157202577405492, + -5.839722006386044, + -14.057294874587734, + -8.23014263017403, + -11.929789009370651, + -10.133370796737383, + 0.1566263663844085, + 0.1905057843732496, + 0.1905057843732507, + 0.15662636638440755, + -0.1597612468906295, + -0.15976124689062954, + 13.157202577405492, + 5.839722006386044, + 14.057294874587734, + 8.23014263017403, + 11.92978900937065, + 10.133370796737381, + 0.15662636638440824, + 0.19050578437324972, + 0.19050578437325041, + 0.1566263663844078, + -0.1597612468906297, + -0.15976124689062984, + -8.230142663845788, + -14.05729490825949, + -5.839722040057806, + -13.157202611077249, + -10.13337083040913, + -11.929789043042394, + -13.157202577405492, + -5.839722006386045, + -14.057294874587736, + -8.23014263017403, + -11.929789009370651, + -10.133370796737383, + 0.15662636638440758, + 0.19050578437325066, + 0.19050578437324955, + 0.15662636638440874, + -0.1597612468906296, + -0.1597612468906293, + -0.15662636638440908, + -0.1905057843732507, + -0.19050578437325008, + -0.1566263663844095, + 0.1597612468906276, + 0.15976124689062782, + 13.157202577405494, + 5.8397220063860455, + 14.057294874587734, + 8.23014263017403, + 11.929789009370651, + 10.133370796737383 + ] + }, + { + "hovertemplate": "Atom=S
X=%{x}
Y=%{y}
Z=%{z}", + "legendgroup": "S", + "marker": { + "color": "#FFA15A", + "size": 3, + "symbol": "circle" + }, + "mode": "markers", + "name": "S", + "scene": "scene", + "showlegend": true, + "type": "scatter3d", + "x": [ + 0.034258961035849717, + 0.12058093121833342, + 0.1205809312183321, + 0.03425896103584971, + 0.12058093121831945, + 0.12058093121834593, + -8.620417311697558, + -10.429363984466557, + -5.830176723749235, + -0.03425896103585506, + -0.12058093121833972, + -0.12058093121834156, + -8.620417311697556, + -10.429363984466553, + -5.830176723749233, + -8.620417311697558, + -10.429363984466557, + -5.830176723749235, + 8.620417311697556, + 10.429363984466555, + 5.830176723749234, + -0.03425896103575617, + -0.12058093121815806, + -0.1205809312182309, + 8.620417311697556, + 10.429363984466553, + 5.830176723749233, + -8.620417985132587, + -5.830176690077483, + -10.429363950794805, + 8.620417311697556, + 10.429363984466555, + 5.830176723749234, + 8.620417311697558, + 10.429363984466555, + 5.8301767237492355 + ], + "y": [ + 8.620417311697558, + 10.429363984466557, + 5.830176723749235, + -8.620417985132587, + -5.830176690077483, + -10.429363950794805, + -0.03425896103585008, + -0.1205809312183334, + -0.12058093121833306, + 8.620417311697558, + 10.429363984466557, + 5.830176723749235, + 8.620417985132587, + 5.830176690077482, + 10.429363950794807, + 0.03425896103585064, + 0.12058093121833358, + 0.12058093121833459, + -8.620417985132587, + -5.830176690077483, + -10.429363950794805, + -8.620417985132585, + -5.830176690077478, + -10.429363950794798, + 0.03425896103584947, + 0.1205809312183318, + 0.12058093121833277, + -8.620417311697556, + -10.429363984466553, + -5.830176723749235, + 8.620417985132587, + 5.830176690077482, + 10.429363950794805, + -0.03425896103584908, + -0.12058093121833123, + -0.12058093121833229 + ], + "z": [ + 8.620417985132587, + 5.830176690077482, + 10.429363950794805, + 8.620417311697556, + 10.429363984466555, + 5.830176723749235, + -8.620417985132587, + -5.830176690077482, + -10.429363950794807, + -8.620417985132587, + -5.830176690077483, + -10.429363950794805, + -0.034258961035849196, + -0.12058093121833235, + -0.12058093121833167, + 8.620417985132587, + 5.830176690077483, + 10.429363950794805, + -0.03425896103584912, + -0.12058093121833205, + -0.12058093121833176, + -8.620417311697556, + -10.429363984466551, + -5.830176723749232, + -8.620417985132587, + -5.830176690077483, + -10.429363950794807, + -0.034258961035849314, + -0.12058093121833184, + -0.12058093121833247, + 0.03425896103585054, + 0.12058093121833388, + 0.12058093121833417, + 8.620417985132587, + 5.830176690077483, + 10.429363950794805 + ] + } + ], + "layout": { + "autosize": false, + "height": 400, + "legend": { + "title": { + "text": "Atom" + }, + "tracegroupgap": 0 + }, + "scene": { + "domain": { + "x": [ + 0, + 1 + ], + "y": [ + 0, + 1 + ] + }, + "xaxis": { + "title": { + "text": "X" + } + }, + "yaxis": { + "title": { + "text": "Y" + } + }, + "zaxis": { + "title": { + "text": "Z" + } + } + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "MOP: CBU16CBU212\n AM: (4-planar)x6(2-bent)x12_Oh" + }, + "width": 1200 + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "half_bond_length = 1.4 / 2\n", + "am_json_file = './data/am/assembly_models.json'\n", + "\n", + "# NOTE test case 1\n", + "cbu1_file = './data/cbu/CBU1.json'\n", + "cbu2_file = './data/cbu/CBU2.json'\n", + "am_label = '(4-planar)x6(2-bent)x12_Oh'\n", + "gbu_type_1 = GenericBuildingUnitType(hasModularity=4, hasPlanarity='planar')\n", + "gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='bent')\n", + "gbu_number_1 = 6\n", + "gbu_number_2 = 12\n", + "am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 2\n", + "# cbu1_file = './data/cbu/CBU1.json'\n", + "# cbu2_file = './data/cbu/CBU2.json'\n", + "# am_label = '(4-planar)x12(2-bent)x24_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=4, hasPlanarity='planar')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='bent')\n", + "# gbu_number_1 = 12\n", + "# gbu_number_2 = 24\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 3\n", + "# cbu1_file = './data/cbu/[V5O9].json'\n", + "# cbu2_file = './data/cbu/A1.json'\n", + "# am_label = '(4-pyramidal)x6(2-linear)x12_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=4, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='linear')\n", + "# gbu_number_1, gbu_number_2 = 6, 12\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 4\n", + "# cbu1_file = './data/cbu/[WV5O11].json'\n", + "# cbu2_file = './data/cbu/A1.json'\n", + "# am_label = '(5-pyramidal)x12(2-linear)x30_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=5, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='linear')\n", + "# gbu_number_1, gbu_number_2 = 12, 30\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 4\n", + "# cbu1_file = './data/cbu/[WV5O11].json'\n", + "# cbu2_file = './data/cbu/[(C10H6)(CO2)].json'\n", + "# am_label = '(5-pyramidal)x12(2-linear)x30_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=5, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='linear')\n", + "# gbu_number_1, gbu_number_2 = 12, 30\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 5\n", + "# cbu1_file = './data/cbu/[WV5O11].json'\n", + "# cbu2_file = './data/cbu/[(C6H3)(C2C6H4)3(CO2)3].json'\n", + "# am_label = '(5-pyramidal)x12(3-planar)x20_Ih'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=5, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='planar')\n", + "# gbu_number_1, gbu_number_2 = 12, 20\n", + "# am_symmetry = 'Ih'\n", + "\n", + "# # NOTE test case 6\n", + "# cbu1_file = './data/cbu/[V5O9].json'\n", + "# cbu2_file = './data/cbu/[(C6H3)(C2C6H4)3(CO2)3].json'\n", + "# am_label = '(4-pyramidal)x6(3-planar)x8_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=4, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='planar')\n", + "# gbu_number_1, gbu_number_2 = 6, 8\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 7\n", + "# cbu1_file = './data/cbu/[(C6H3)(C2C6H4)3(CO2)3].json'\n", + "# cbu2_file = './data/cbu/CBU2.json'\n", + "# am_label = '(3-planar)x8(2-bent)x12_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='planar')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='bent')\n", + "# gbu_number_1, gbu_number_2 = 8, 12\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 8\n", + "# cbu1_file = './data/cbu/CBU1.json'\n", + "# cbu2_file = './data/cbu/[(C6H3)(OC6H4)3(CO2)3].json'\n", + "# am_label = '(4-planar)x6(3-pyramidal)x8_Oh'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=4, hasPlanarity='planar')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='pyramidal')\n", + "# gbu_number_1, gbu_number_2 = 6, 8\n", + "# am_symmetry = 'Oh'\n", + "\n", + "# # NOTE test case 9\n", + "# cbu1_file = './data/cbu/[(C6H3)(C2C6H4)3(CO2)3].json'\n", + "# cbu2_file = './data/cbu/[(C6H3)(OC6H4)3(CO2)3].json'\n", + "# am_label = '(3-planar)x4(3-pyramidal)x4_Td'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='planar')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='pyramidal')\n", + "# gbu_number_1, gbu_number_2 = 4, 4\n", + "# am_symmetry = 'Td'\n", + "\n", + "# # NOTE test case 10\n", + "# cbu1_file = './data/cbu/[(C6H3)(OC6H4)3(CO2)3].json'\n", + "# cbu2_file = './data/cbu/[(C10H6)(CO2)].json'\n", + "# am_label = '(3-pyramidal)x4(2-linear)x6_Td'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='linear')\n", + "# gbu_number_1, gbu_number_2 = 4, 6\n", + "# am_symmetry = 'Td'\n", + "\n", + "# # NOTE test case 11\n", + "# cbu1_file = './data/cbu/[(C6H3)(C2C6H4)3(CO2)3].json'\n", + "# cbu2_file = './data/cbu/CBU2.json'\n", + "# am_label = '(3-planar)x4(2-bent)x6_Td'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='planar')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='bent')\n", + "# gbu_number_1, gbu_number_2 = 4, 6\n", + "# am_symmetry = 'Td'\n", + "\n", + "# # NOTE test case 12\n", + "# cbu1_file = './data/cbu/[(C6H3)(OC6H4)3(CO2)3].json'\n", + "# cbu2_file = './data/cbu/CBU2.json'\n", + "# am_label = '(3-pyramidal)x2(2-bent)x3_D3h'\n", + "# gbu_type_1 = GenericBuildingUnitType(hasModularity=3, hasPlanarity='pyramidal')\n", + "# gbu_type_2 = GenericBuildingUnitType(hasModularity=2, hasPlanarity='bent')\n", + "# gbu_number_1, gbu_number_2 = 2, 3\n", + "# am_symmetry = 'D3h'\n", + "\n", + "am_json, cbu1_json, cbu2_json = load_am_cbu_json(am_json_file, cbu1_file, cbu2_file)\n", + "mop, cbu1, cbu2, gbu1, gbu2 = instantiate_mop(am_json[am_label], cbu1_json, cbu2_json, gbu_type_1, gbu_type_2, gbu_number_1, gbu_number_2, am_symmetry)\n", + "\n", + "cbu_translated = mop.assemble(half_bond_length = half_bond_length)\n", + "\n", + "rows = []\n", + "for cbu, gcc_points in cbu_translated.items():\n", + " for gcc in gcc_points:\n", + " for point in gcc_points[gcc]:\n", + " rows.append([cbu, gcc, point.x, point.y, point.z, point.label])\n", + "\n", + "df = pd.DataFrame(rows, columns=['CBU', 'GBU_CC', 'X', 'Y', 'Z', 'Atom'])\n", + "\n", + "import plotly.express as px\n", + "mop_name = f\"{cbu1_file.split('/')[-1].split('.json')[0]}{gbu_number_1}{cbu2_file.split('/')[-1].split('.json')[0]}{gbu_number_2}\"\n", + "fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='Atom', title=f'MOP: {mop_name}\\n AM: {am_label}')\n", + "fig.update_traces(marker=dict(size=3))\n", + "fig.update_layout(autosize=False, width=1200, height=400)\n", + "fig.write_html(f'./data/plotly/{am_label}___{mop_name}.html')\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "twa", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/MOPTools/twa_mops/cavity.py b/MOPTools/twa_mops/cavity.py new file mode 100644 index 00000000000..20ccffdf7ed --- /dev/null +++ b/MOPTools/twa_mops/cavity.py @@ -0,0 +1,83 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +import argparse +import time +import sys +import os + +HELP_FILE = Path.cwd() / ".first_run_done" + +def display_help(): + print( + f""" + NOTE: Please run this script in the same directory as the molovol executable. + + Args: + -r: Small probe radius (default is 1.2) + -g: Grid resolution (default is 0.5) + -r2: Large probe radius (default is 3) + -fs: XYZ structure file or folder (default is ./inputfile) + -do: Directory for outputs (default is ./outputs_, will be created if it doesn't exist) + -n: Number of concurrent processes (default is 2) + + Press Enter to continue, or any other key to exit... + """ + ) + +def get_xyz_files(path): + xyz_files = [] + path = Path(path) + if path.is_file(): + if path.suffix == '.xyz': + xyz_files.append(path) + elif path.is_dir(): + for file in path.rglob('*.xyz'): + xyz_files.append(file) + return xyz_files + +def run_subprocess(args): + os.system(f"molovol -r {args.r} -g {args.grid} -r2 {args.r2} -fs {args.fs} -xr -do {args.do} -q -o none") + # -o inputfile,radius_small,radius_large,resolution,formula,vol_core_s,vol_shell_s + +def main(): + if not HELP_FILE.exists(): + display_help() + user_input = input() + if user_input != "": + print("Exiting the script...") + sys.exit(0) + else: + print("Continuing with the script...") + HELP_FILE.touch() + + parser = argparse.ArgumentParser(description="Parallel Execution Script") + parser.add_argument('-r', type=float, default=1.2, help="Small probe radius") + parser.add_argument('-g', '--grid', type=float, default=0.5, help="Grid resolution") + parser.add_argument('-r2', type=float, default=3, help="Large probe radius") + parser.add_argument('-fs', type=str, default='./inputfile', help="XYZ structure file or folder") + parser.add_argument('-do', type=str, default=f'outputs_{int(time.time())}', help="Directory for outputs") + parser.add_argument('-n', '--num-processes', type=int, default=2, help="Number of concurrent processes (default is 2)") + args = parser.parse_args() + + if not os.path.exists(args.do): + os.makedirs(args.do) + else: + if not os.path.isdir(args.do): + raise Exception(f'Please provide a valid directory for output files, received: {args.do}') + xyzs = get_xyz_files(args.fs) + + tasks = [] + for xyz in xyzs: + task = argparse.Namespace(r=args.r, grid=args.grid, r2=args.r2, fs=xyz, do=args.do) + tasks.append(task) + + with ThreadPoolExecutor(max_workers=args.num_processes) as executor: + futures = [executor.submit(run_subprocess, task) for task in tasks] + + # Wait for all processes to complete + for future in as_completed(futures): + output = future.result() + print(output) + +if __name__ == '__main__': + main() diff --git a/MOPTools/twa_mops/cavity_and_pore_size.py b/MOPTools/twa_mops/cavity_and_pore_size.py new file mode 100644 index 00000000000..896e549ec65 --- /dev/null +++ b/MOPTools/twa_mops/cavity_and_pore_size.py @@ -0,0 +1,78 @@ +# NOTE script adapted by the original version provided by Dr. Aleksandar Kondinski (ak2332@cam.ac.uk) + +from geo import * +import os +import csv +import math +from rdkit.Chem import GetPeriodicTable +from rdkit.Chem.rdmolfiles import MolFromXYZFile + + +# Define the directory paths and file names +xyz_folder = "./data/xyz_mops_new" +output_csv = "./data/xyz_mops_new/ogm_inner_diameter_new.csv" + +PERIODIC_TABLE = GetPeriodicTable() + +def outer_diameter(atoms: List[Point]): + positions = np.array([atom.as_array for atom in atoms]) + distances = np.linalg.norm(positions, axis=1) + return distances.max() * 2 + + +def largest_inner_sphere_diameter(atoms: List[Point]): + # find the centroid of the atoms + centriod = Point.centroid(atoms) + + # find the atom that is closest to the centroid when subtracted covalent radius + sorted_by_adjusted_distance = sorted(atoms, key = lambda x: centriod.get_distance_to(x) - PERIODIC_TABLE.GetRcovalent(x.label)) + + # calculate the inner diameter as the adjusted distance, as well as the inner volume of the sphere + inner_radius = sorted_by_adjusted_distance[0].get_distance_to(centriod) - PERIODIC_TABLE.GetRcovalent(sorted_by_adjusted_distance[0].label) + inner_radius = max(0, inner_radius) # set it to 0 if it's negative (meaning the centroid is within distance of the atom's covalent radius) + inner_diameter = inner_radius * 2 + inner_diameter_atom = sorted_by_adjusted_distance[0].label + inner_volume = (4 / 3) * math.pi * inner_radius ** 3 + return inner_diameter_atom, inner_diameter, inner_volume + + +def pore_size_diameter(atoms: List[Point], probing_vector: Vector): + # filter out the atoms that are in the positive direction of the probing vector + postive_atoms = [a for a in atoms if a.as_array.dot(probing_vector.as_array) > 0] + if not postive_atoms: + return None + # calculate the perpendicular distances of the atoms to the probing direction and sort them by the adjusted distance (subtracting covalent radius) + sorted_by_adjusted_distance = sorted(postive_atoms, key = lambda x: x.get_distance_to_vector(probing_vector) - PERIODIC_TABLE.GetRcovalent(x.label)) + pore_size = sorted_by_adjusted_distance[0].get_distance_to_vector(probing_vector) - PERIODIC_TABLE.GetRcovalent(sorted_by_adjusted_distance[0].label) + pore_size_diameter = max(0, pore_size) * 2 # set it to 0 if it's negative (meaning the vector is within distance of the atom's covalent radius) + return pore_size_diameter + + +if __name__ == '__main__': + with open(output_csv, mode='w', newline='') as csv_file: + csv_writer = csv.writer(csv_file) + csv_writer.writerow(["File Name", "Atom", "Inner Diameter", "Volume"]) + + failed = 0 + # Loop through each XYZ file in the folder + for file_name in os.listdir(xyz_folder): + if file_name.endswith(".xyz"): + file_path = os.path.join(xyz_folder, file_name) + + # Read the XYZ file to find the closest atom + mol = MolFromXYZFile(file_path) + + if mol is None: + failed += 1 + csv_writer.writerow([file_name, 'failed', 'failed', 'failed']) + else: + atoms = [] + for a in mol.GetAtoms(): + pos = mol.GetConformer().GetAtomPosition(a.GetIdx()) + pt = Point(x=pos.x, y=pos.y, z=pos.z, label=a.GetSymbol()) + atoms.append(pt) + + inner_diameter_atom, inner_diameter, inner_volume = largest_inner_sphere_diameter(atoms) + + csv_writer.writerow([file_name, inner_diameter_atom, inner_diameter, inner_volume]) + print(f"Processed {file_name}: Closest atom = {inner_diameter_atom}, Inner diameter = {inner_diameter:.3f} Å, Volume = {inner_volume:.3f} ų") diff --git a/MOPTools/twa_mops/geo.py b/MOPTools/twa_mops/geo.py new file mode 100644 index 00000000000..6a7d3e10d42 --- /dev/null +++ b/MOPTools/twa_mops/geo.py @@ -0,0 +1,466 @@ +# This file contains utility classes for geometric calculations +from typing import List, Tuple, Optional +from itertools import combinations, product +import numpy as np +from scipy.spatial.transform import Rotation as R +from scipy.spatial import Delaunay +from pydantic import BaseModel + + +class RotationMatrix(): + matrix: np.ndarray + + def __init__(self, matrix: np.ndarray): + self.matrix = matrix + + def apply(self, vector: np.ndarray): + return self.matrix @ vector + + def combine(self, other: 'RotationMatrix'): + return RotationMatrix(matrix=self.matrix @ other.matrix) + + def as_matrix(self): + return self.matrix + + def as_quaternion(self): + return R.from_matrix(self.matrix).as_quat() + + def as_quaternion_str(self): + q = self.as_quaternion() + return f'{q[0]:f}#{q[1]:f}#{q[2]:f}#{q[3]:f}' + + @classmethod + def identity(cls): + return cls(matrix=np.eye(3)) + + +class Quaternion(BaseModel): + x: float + y: float + z: float + w: float + + @property + def as_array(self): + return np.array([self.x, self.y, self.z, self.w]) + + def as_str(self): + return f'{self.x:f}#{self.y:f}#{self.z:f}#{self.w:f}' + + def as_rotation_matrix(self): + return RotationMatrix(matrix=R.from_quat(self.as_array).as_matrix()) + + def rotate_points(self, points: List['Point']): + return [Point.rotate(pt, self.as_rotation_matrix()) for pt in points] + + @classmethod + def from_rotation_matrix(cls, rotation_matrix: RotationMatrix): + q = R.from_matrix(rotation_matrix.matrix).as_quat() + return cls(x=q[0], y=q[1], z=q[2], w=q[3]) + + @classmethod + def from_string(cls, q_str: str): + q = q_str.split('#') + return cls(x=float(q[0]), y=float(q[1]), z=float(q[2]), w=float(q[3])) + + +class Point(BaseModel): + x: float + y: float + z: float + label: Optional[str] = None + + def __repr__(self): + return f"Point({self.x}, {self.y}, {self.z})" + + @property + def as_array(self): + return np.array([self.x, self.y, self.z]) + + def get_distance_to(self, other: 'Point'): + return np.linalg.norm(self.as_array - other.as_array) + + def get_distance_to_vector(self, vector: 'Vector'): + projection = vector.projection_of_point(self) + return self.get_distance_to(projection) + + def get_mid_point_to(self, other: 'Point'): + return Point(x=(self.x+other.x)/2, y=(self.y+other.y)/2, z=(self.z+other.z)/2) + + def get_translation_vector_to(self, other: 'Point'): + return Vector(x=other.x - self.x, y=other.y - self.y, z=other.z - self.z) + + def overlaps_with(self, other: 'Point'): + return np.allclose(self.as_array, other.as_array) + + def farthest_point(self, points: List['Point']): + max_dist = 0 + max_point = None + for pt in points: + dist = self.get_distance_to(pt) + if dist > max_dist: + max_dist = dist + max_point = pt + return max_point + + def rank_distance_to_points(self, points: List['Point']): + return sorted(points, key=lambda x: self.get_distance_to(x)) + + def get_points_within_threshold_distance(self, points: List['Point'], threshold_distance: float): + return [pt for pt in points if self.get_distance_to(pt) <= threshold_distance] + + @classmethod + def mid_point(cls, pt1: 'Point', pt2: 'Point'): + return cls(x=(pt1.x + pt2.x) / 2, y=(pt1.y + pt2.y) / 2, z=(pt1.z + pt2.z) / 2) + + @classmethod + def centroid(cls, points: List['Point']): + return cls(x=np.mean([pt.x for pt in points]), y=np.mean([pt.y for pt in points]), z=np.mean([pt.z for pt in points])) + + @classmethod + def from_array(cls, arr, label=None): + return cls(x=arr[0], y=arr[1], z=arr[2], label=label) + + @classmethod + def rotate(cls, point: 'Point', rotation: RotationMatrix): + _ = rotation.apply(point.as_array) + return cls(x=_[0], y=_[1], z=_[2], label=point.label) + + @classmethod + def scale(cls, point: 'Point', factor: float): + return cls(x=point.x * factor, y=point.y * factor, z=point.z * factor, label=point.label) + + @classmethod + def translate(cls, point: 'Point', vector: 'Vector'): + return cls(x=point.x + vector.x, y=point.y + vector.y, z=point.z + vector.z, label=point.label) + + @classmethod + def translate_points_to_target_centroid(cls, points: List['Point'], target_centroid: 'Point') -> Tuple[List['Point'], 'Vector']: + current_centroid = cls.centroid(points) + translation = current_centroid.get_translation_vector_to(target_centroid) + return [cls.translate(pt, translation) for pt in points], translation + + @classmethod + def farthest_pair(cls, points: List['Point']): + max_dist = 0 + max_points = (None, None) + for pt1, pt2 in combinations(points, 2): + dist = pt1.get_distance_to(pt2) + if dist > max_dist: + max_dist = dist + max_points = (pt1, pt2) + return max_points + + @classmethod + def closest_pair(cls, points: List['Point']): + min_dist = np.inf + min_points = (None, None) + for pt1, pt2 in combinations(points, 2): + dist = pt1.get_distance_to(pt2) + if dist < min_dist: + min_dist = dist + min_points = (pt1, pt2) + return min_points + + @classmethod + def closest_pair_across_lists(cls, point_list1: List['Point'], point_list2: List['Point']): + min_dist = np.inf + min_points = (None, None) + for pt1, pt2 in product(point_list1, point_list2): + dist = pt1.get_distance_to(pt2) + if dist < min_dist: + min_dist = dist + min_points = (pt1, pt2) + return min_points + + @classmethod + def fit_circle_2d(cls, points: List['Point']): + if len(points) < 3: + raise ValueError(f'At least 3 points are required to fit circumcenter. Provided points: {points}') + elif len(points) > 5: + raise NotImplementedError('Fitting circumcenter for more than 5 points is not implemented yet') + else: + # least squares solution to find the circumcenter + # first fit a plane + plane = Plane.fit_from_points(points) + # project the points to the plane + projected_points = [plane.project_point(pt) for pt in points] + # find a local 2-D coordinate system using the first two points + origin = projected_points[0] + u = Vector.from_two_points(start=origin, end=projected_points[1]) + u = u.get_unit_vector() + norm = plane.normal.get_unit_vector() + v = norm.get_cross_product(u) + v /= np.linalg.norm(v) + # project the points to the local 2-D coordinate system + local_points = [Point(x=np.dot(pt.as_array - origin.as_array, u.as_array), y=np.dot(pt.as_array - origin.as_array, v), z=0) for pt in projected_points] + local_np_arrs = np.array([pt.as_array for pt in local_points]) + local_x = local_np_arrs[:, 0] + local_y = local_np_arrs[:, 1] + a = np.column_stack((local_x, local_y, np.ones(len(local_x)))) + b = local_x**2 + local_y**2 + a_coef, b_coef, c_coef = np.linalg.lstsq(a, b, rcond=None)[0] + x_center = a_coef / 2 + y_center = b_coef / 2 + c = origin.as_array + x_center * u.as_array + y_center * v + r = np.sqrt(x_center**2 + y_center**2 + c_coef) + return cls.from_array(c), r + +class Vector(BaseModel): + x: float + y: float + z: float + + def __repr__(self): + return f"Vector({self.x}, {self.y}, {self.z})" + + def as_str(self): + return f"{self.x:f}#{self.y:f}#{self.z:f}" + + @property + def as_array(self): + return np.array([self.x, self.y, self.z]) + + @property + def magnitude(self): + return np.linalg.norm(self.as_array) + + def get_unit_vector(self): + mag = self.magnitude + return Vector(x = self.x / mag, y = self.y / mag, z = self.z / mag) + + def get_flip_vector(self): + return Vector(x = -self.x, y = -self.y, z = -self.z) + + def get_dot_product(self, other: 'Vector'): + return np.dot(self.as_array, other.as_array) + + def get_cross_product(self, other: 'Vector'): + return np.cross(self.as_array, other.as_array) + + def get_rad_angle_to(self, other: 'Vector'): + return np.arccos(np.clip(self.get_dot_product(other) / (self.magnitude * other.magnitude), -1.0, 1.0)) + + def get_deg_angle_to(self, other: 'Vector'): + return np.degrees(self.get_rad_angle_to(other)) + + def projection_of_point(self, point: Point): + vector_unit = self.get_unit_vector() + scalar_projection = np.dot(point.as_array, vector_unit.as_array) + projection = scalar_projection * vector_unit.as_array + return Point(x=projection[0], y=projection[1], z=projection[2]) + + def get_rotation_matrix_to_parallel( + self, + other: 'Vector', + flip_if_180: bool = False, + base_axis_if_180: Optional['Vector'] = None + ) -> RotationMatrix: + # TODO not elegant but works for now + v1 = self.get_unit_vector() + v2 = other.get_unit_vector() + axis = self.get_cross_product(v2) + if np.allclose(np.linalg.norm(axis), 0): # NOTE this is to avoid numerical errors + if v1.get_dot_product(v2) < 0: + if flip_if_180: + if base_axis_if_180 is not None: + return RotationMatrix(matrix=R.from_rotvec(np.pi * base_axis_if_180.get_unit_vector().as_array).as_matrix()) + else: + # TODO maybe not neccessary to flip all three directions... revisit this... + return RotationMatrix(matrix=R.identity().as_matrix() * -1) + else: + return RotationMatrix(matrix=R.identity().as_matrix()) + else: + return RotationMatrix(matrix=R.identity().as_matrix()) + axis = axis / np.linalg.norm(axis) + angle = np.arccos(np.clip(v1.get_dot_product(v2), -1.0, 1.0)) + rotation = R.from_rotvec(angle * axis) + return RotationMatrix(matrix=rotation.as_matrix()) + + @classmethod + def from_array(cls, arr): + return cls(x=arr[0], y=arr[1], z=arr[2]) + + @classmethod + def from_string(cls, string): + arr = string.split('#') + return cls(x=float(arr[0]), y=float(arr[1]), z=float(arr[2])) + + @classmethod + def from_two_points(cls, start: Point, end: Point): + return cls(x = end.x - start.x, y = end.y - start.y, z = end.z - start.z) + + @classmethod + def rotate_to_parallel(cls, vector: 'Vector', other: 'Vector'): + rotation_matrix = vector.get_rotation_matrix_to_parallel(other) + r_v = rotation_matrix.apply(vector.as_array) + return cls(x=r_v[0], y=r_v[1], z=r_v[2]) + + +class Line(BaseModel): + point: Point + direction: Vector + + def __repr__(self): + return f"Line(Point={self.point}, Direction={self.direction})" + + @classmethod + def from_two_points(cls, start: Point, end: Point): + return cls(point=start, direction=Vector.from_two_points(start, end)) + + def project_point(self, other_point: Point): + if self.is_point_on_line(other_point): + return Point(x=other_point.x, y=other_point.y, z=other_point.z) + v = Vector.from_two_points(self.point, other_point) + v_proj = (np.dot(v.as_array, self.direction.as_array) / np.dot(self.direction.as_array, self.direction.as_array)) * self.direction.as_array + return Point(x=self.point.x + v_proj[0], y=self.point.y + v_proj[1], z=self.point.z + v_proj[2]) + + def is_point_on_line(self, other: Point, threshold: float = 1e-6): + cross_product = self.direction.get_cross_product(Vector.from_two_points(self.point, other)) + return np.allclose(cross_product, [0, 0, 0]) + + def normal_vector_from_point_to_line(self, other: Point): + if self.is_point_on_line(other): + # this means the point is on the line + # so we return the normal vector of the direction of the line + # and the normal vector needs to pass through the other point + # as this is a line, we can choose any arbitrary vector that is not parallel to the line + v = self.direction.get_unit_vector().as_array + if not np.allclose(v, [1, 0, 0]): + # if v is not parallel to the x-axis + arbitrary_vector = np.array([1, 0, 0]) + else: # if v is parallel to the x-axis, choose a different arbitrary vector + arbitrary_vector = np.array([0, 1, 0]) + # cross product for normal vector + normal_vector = np.cross(v, arbitrary_vector) + normal_vector = normal_vector / np.linalg.norm(normal_vector) + return Vector(x=normal_vector[0] + other.x, y=normal_vector[1] + other.y, z=normal_vector[2] + other.z) + else: + v = Vector.from_two_points(other, self.point) + v_proj = (np.dot(v.as_array, self.direction.as_array) / np.dot(self.direction.as_array, self.direction.as_array)) * self.direction.as_array + return Vector(x=v.x - v_proj[0], y=v.y - v_proj[1], z=v.z - v_proj[2]) + + +class Plane(BaseModel): + point: Point + normal: Vector + + def __repr__(self): + return f"Plane(Point={self.point}, Normal={self.normal})" + + @property + def local_2d_coordinate_system_vectors(self): + if self.normal.x != 0 or self.normal.y != 0: + v1 = Vector(x=-self.normal.y, y=self.normal.x, z=0).get_unit_vector() + else: + v1 = Vector(x=1, y=0, z=0) + v2 = Vector.from_array(self.normal.get_cross_product(v1)) + return v1, v2 + + @classmethod + def from_point_and_normal(cls, point: Point, normal: Vector): + return cls(point = point, normal = normal.get_unit_vector()) + + @classmethod + def from_two_vectors(cls, vector1: Vector, vector2: Vector, point_on_plane: Point = Point(x=0, y=0, z=0), positive_ref_point: Point = None): + normal_vector = vector1.get_cross_product(vector2) + norm = normal_vector / np.linalg.norm(normal_vector) + temp_plane = cls(point=point_on_plane, normal=Vector.from_array(norm)) + # check if the reference point is on the positive side of the plane + if positive_ref_point is None or temp_plane.is_point_on_positive_side(positive_ref_point): + return temp_plane + return cls(point=point_on_plane, normal=Vector.from_array(-norm)) + + @classmethod + def from_three_points(cls, pt1: Point, pt2: Point, pt3: Point, positive_ref_point: Point = None): + v1 = Vector(x = pt2.x - pt1.x, y = pt2.y - pt1.y, z = pt2.z - pt1.z) + v2 = Vector(x = pt3.x - pt1.x, y = pt3.y - pt1.y, z = pt3.z - pt1.z) + return cls.from_two_vectors(v1, v2, pt1, positive_ref_point) + + @classmethod + def fit_from_points(cls, points: List[Point], positive_ref_point: Point = None): + n = len(points) + if n < 3: + raise ValueError(f'At least 3 points are required to fit a plane. Provided points: {points}') + elif n == 3: + return cls.from_three_points(pt1=points[0], pt2=points[1], pt3=points[2], positive_ref_point=positive_ref_point) + else: + A = np.array([[pt.x, pt.y, pt.z] for pt in points]) + centroid = np.mean(A, axis=0) + A -= centroid + _, _, V = np.linalg.svd(A) + normal = V[-1] + temp_plane = cls(point=Point.from_array(centroid), normal=Vector.from_array(normal)) + # check if the reference point is on the positive side of the plane + if positive_ref_point is None or temp_plane.is_point_on_positive_side(positive_ref_point): + return temp_plane + return cls(point=Point.from_array(centroid), normal=Vector.from_array(-normal)) + + def is_point_on_positive_side(self, other: Point): + return np.dot(other.as_array - self.point.as_array, self.normal.as_array) > 0 + + def normal_vector_from_point_to_plane(self, other: Point) -> Vector: + w = Vector.from_two_points(start=other, end=self.point) + proj = w.get_dot_product(self.normal) / self.normal.magnitude + if proj == 0: + # this means the point is on the plane + # so we return the normal vector + return Vector.from_array(self.normal.as_array) + return Vector(x=proj * self.normal.x, y=proj * self.normal.y, z=proj * self.normal.z) + + def project_point(self, other_point: Point) -> Point: + pt = other_point.as_array + plane_normal = self.normal.get_unit_vector().as_array + point_to_plane = pt - self.point.as_array + distance_to_plane = np.dot(point_to_plane, plane_normal) + projected_point = pt - distance_to_plane * plane_normal + return Point(x=projected_point[0], y=projected_point[1], z=projected_point[2]) + + def project_line(self, line: Line) -> Line: + projected_start_point = self.project_point(line.point) + projected_end_along_direction = self.project_point(Point(x=line.point.x + line.direction.x, y=line.point.y + line.direction.y, z=line.point.z + line.direction.z)) + return Line.from_two_points(projected_start_point, projected_end_along_direction) + + def get_projected_vector(self, vector: Vector) -> Vector: + unit_normal = self.normal.get_unit_vector() + return Vector.from_array(vector.as_array - vector.get_dot_product(unit_normal) * unit_normal.as_array) + + def find_perpendicular_bisector_on_plane(self, point1: Point, point2: Point) -> Line: + projected_point_1 = self.project_point(point1) + projected_point_2 = self.project_point(point2) + mid_point = Point.mid_point(projected_point_1, projected_point_2) + proj_line = Line.from_two_points(projected_point_1, projected_point_2) + v = self.normal.get_cross_product(proj_line.direction) + if np.linalg.norm(v) == 0: + raise ValueError(f'The two points {point1} and {point2} define a line that is perpendicular to the plane {self}. No unique perpendicular bisector exists on the plane.') + else: + v = v / np.linalg.norm(v) + return Line(point=mid_point, direction=Vector.from_array(v)) + + def find_intersection_of_lines_projected(self, line1: Line, line2: Line) -> Point: + proj_l1 = self.project_line(line1) + proj_l2 = self.project_line(line2) + a = np.array([proj_l1.direction.as_array, -proj_l2.direction.as_array]).T + b = proj_l2.point.as_array - proj_l1.point.as_array + if np.linalg.matrix_rank(a) < 2: + # the two lines are parallel + return None + t = np.linalg.lstsq(a, b, rcond=None)[0][0] + intersection = proj_l1.point.as_array + t * proj_l1.direction.as_array + return Point.from_array(intersection) + + def project_points_to_local_2d_coordinate(self, points: List['Point']): + projected_points = [self.project_point(pt) for pt in points] + v1, v2 = self.local_2d_coordinate_system_vectors + # project the points to the local 2-D coordinate system + local_points = [] + for pt in projected_points: + x = np.dot(pt.as_array - self.point.as_array, v1.as_array) + y = np.dot(pt.as_array - self.point.as_array, v2.as_array) + local_points.append(Point(x=x, y=y, z=0)) + return local_points + + def local_2d_delaunay_triangulation(self, points: List['Point']): + local_points = self.project_points_to_local_2d_coordinate(points) + delaunay = Delaunay(np.array([pt.as_array for pt in local_points])) + return delaunay diff --git a/MOPTools/twa_mops/main.py b/MOPTools/twa_mops/main.py new file mode 100644 index 00000000000..7f401e95df3 --- /dev/null +++ b/MOPTools/twa_mops/main.py @@ -0,0 +1,55 @@ +from twa.kg_operations import PySparqlClient +from twa.conf import config_generic, Config +from rdflib import Graph, URIRef +import ontomops +import ontospecies +import om +import alg2 + +class MOPsConfig(Config): + SPARQL_ENDPOINT: str + +endpoint = config_generic(MOPsConfig, env_file='./mops.env').SPARQL_ENDPOINT + +if __name__ == "__main__": + sparql_client = PySparqlClient(endpoint, endpoint) + alg2_result = sparql_client.perform_query(alg2.alg2) + # Example result: + # { + # 'organic_gbu_label': '2-linear', + # 'organic_gbu_number': '30', + # 'organic_charge': '-2.0', + # 'metal': 'https://www.theworldavatar.com/kg/ontomops/ChemicalBuildingUnit_782a50de-2db7-4910-9547-7200a723d7d3_0', + # 'organic_formula': '[(C6H4C)2(CO2)2]', + # 'metal_formula': '[WV5O11]', + # 'organic_mw': '264.2318', + # 'am': 'https://www.theworldavatar.com/kg/ontomops/AssemblyModel_4d34c0b4-2a4b-4f16-98dd-97c5ce7349a5', + # 'metal_mw': '614.54193', + # 'metal_gbu': 'https://www.theworldavatar.com/kg/ontomops/GenericBuildingUnit_f664df33-ef4a-44f8-a76f-930234465ea5_0', + # 'am_label': '(5-pyramidal)x12(2-linear)x30', + # 'am_symmetry': 'Ih', + # 'metal_gbu_number': '12', + # 'organic_gbu': 'https://www.theworldavatar.com/kg/ontomops/GenericBuildingUnit_3d71c19a-ab54-4993-8c94-267dcfe41792_1', + # 'metal_charge': '4.0', + # 'organic': 'https://www.theworldavatar.com/kg/ontomops/ChemicalBuildingUnit_ea080404-38bb-462e-a885-49f48daca41e_1', + # 'metal_gbu_label': '5-pyramidal' + # } + g = Graph() + prov = ontomops.Provenance(hasReferenceDOI='PLACEHOLDER DOI') + for i in range(len(alg2_result)): + row = alg2_result[i] + print(f'Instantiating {i + 1}th new MOP: {row}') + mop_mw = float(row['metal_mw']) * int(row['metal_gbu_number']) + float(row['organic_mw']) * int(row['organic_gbu_number']) + mop_charge = float(row['metal_charge']) * int(row['metal_gbu_number']) + float(row['organic_charge']) * int(row['organic_gbu_number']) + new_mop = ontomops.MetalOrganicPolyhedron( + hasAssemblyModel=row['am'], + hasChemicalBuildingUnit=[row['metal'], row['organic']], + hasCharge=ontospecies.Charge(hasValue=om.Measure(hasNumericalValue=mop_charge, hasUnit=om.elementaryCharge)), + hasMolecularWeight=ontospecies.MolecularWeight(hasValue=om.Measure(hasNumericalValue=mop_mw, hasUnit=om.gramPerMole)), + hasMOPFormula=f"{row['metal_formula']}{row['metal_gbu_number']}{row['organic_formula']}{row['organic_gbu_number']}", + hasProvenance=prov, + ) + g.add((URIRef(row['metal']), URIRef(ontomops.IsFunctioningAs.predicate_iri), URIRef(row['metal_gbu']))) + g.add((URIRef(row['organic']), URIRef(ontomops.IsFunctioningAs.predicate_iri), URIRef(row['organic_gbu']))) + ontomops.MetalOrganicPolyhedron.push_all_instances_to_kg(sparql_client, -1) + sparql_client.upload_graph(g) diff --git a/MOPTools/twa_mops/mops.env b/MOPTools/twa_mops/mops.env new file mode 100644 index 00000000000..ef695fa3b3a --- /dev/null +++ b/MOPTools/twa_mops/mops.env @@ -0,0 +1 @@ +SPARQL_ENDPOINT=http://localhost:48082/blazegraph/namespace/ontomops/sparql diff --git a/MOPTools/twa_mops/om.py b/MOPTools/twa_mops/om.py new file mode 100644 index 00000000000..f17a0c70312 --- /dev/null +++ b/MOPTools/twa_mops/om.py @@ -0,0 +1,63 @@ +from __future__ import annotations +from twa.data_model.base_ontology import BaseOntology, BaseClass, ObjectProperty, DatatypeProperty + + +# NOTE TODO this script is incomplete as it only contains necessary classes and properties for the MOPs project +# NOTE TODO a complete OGM representation for OntoSpecies is yet to be implemented +# NOTE TODO it should be moved to a place that accessible to all other ontologies +class OM(BaseOntology): + base_url = 'http://www.ontology-of-units-of-measure.org/resource/om-2/' + + +# object properties +HasValue = ObjectProperty.create_from_base('HasValue', OM) +HasUnit = ObjectProperty.create_from_base('HasUnit', OM) + + +# data properties +HasNumericalValue = DatatypeProperty.create_from_base('HasNumericalValue', OM) + + +# classes +class Unit(BaseClass): + rdfs_isDefinedBy = OM + +class SingularUnit(Unit): + rdfs_isDefinedBy = OM + +class CompoundUnit(Unit): + rdfs_isDefinedBy = OM + +class UnitDivision(CompoundUnit): + rdfs_isDefinedBy = OM + +class Quantity(BaseClass): + rdfs_isDefinedBy = OM + hasValue: HasValue[Measure] + + @property + def num_value(self): + return list(self.hasValue)[0].hasNumericalValue + + @property + def unit(self): + return list(self.hasValue)[0].hasUnit + +class Length(Quantity): + rdfs_isDefinedBy = OM + +class Radius(Length): + rdfs_isDefinedBy = OM + +class Diameter(Length): + rdfs_isDefinedBy = OM + +class Measure(BaseClass): + rdfs_isDefinedBy = OM + hasUnit: HasUnit[Unit] + hasNumericalValue: HasNumericalValue[float] + +# TODO need to find a better way for defining the units +gramPerMole = OM.namespace_iri + 'gramPerMole' +elementaryCharge = OM.namespace_iri + 'elementaryCharge' +angstrom = OM.namespace_iri + 'angstrom' diff --git a/MOPTools/twa_mops/ontomops.py b/MOPTools/twa_mops/ontomops.py new file mode 100644 index 00000000000..adf1ca50ee1 --- /dev/null +++ b/MOPTools/twa_mops/ontomops.py @@ -0,0 +1,1059 @@ +from __future__ import annotations +from typing import Optional, Dict, List +from scipy.optimize import fsolve +import plotly.express as px +import pandas as pd +import numpy as np +import math +import json + +from twa.data_model.base_ontology import BaseOntology, BaseClass, ObjectProperty, DatatypeProperty, KnowledgeGraph +import ontospecies +import om +import cavity_and_pore_size as cap + +from geo import Point, Vector, Line, Plane, RotationMatrix, Quaternion + +BINDING_FRAGMENT_METAL = 'Metal' +BINDING_FRAGMENT_CO2 = 'CO2' +BINDING_FRAGMENT_O3 = 'O3' +BINDING_FRAGMENT_N2 = 'N2' +GBU_TYPE_2_BENT = '2-bent' +GBU_TYPE_2_LINEAR = '2-linear' +GBU_TYPE_3_PLANAR = '3-planar' +GBU_TYPE_3_PYRAMIDAL = '3-pyramidal' +GBU_TYPE_4_PLANAR = '4-planar' +GBU_TYPE_4_PYRAMIDAL = '4-pyramidal' +GBU_TYPE_5_PYRAMIDAL = '5-pyramidal' + + +class OntoMOPs(BaseOntology): + base_url = 'https://www.theworldavatar.com/kg' + namespace = 'ontomops' + owl_versionInfo = '1.1-ogm' + rdfs_comment = 'An ontology developed for representing Metal-Organic Polyhedra (MOPs). This is object graph mapper (OGM) version.' + + +# object properties +HasAssemblyModel = ObjectProperty.create_from_base('HasAssemblyModel', OntoMOPs) +HasBindingDirection = ObjectProperty.create_from_base('HasBindingDirection', OntoMOPs) +HasBindingSite = ObjectProperty.create_from_base('HasBindingSite', OntoMOPs) +HasCavity = ObjectProperty.create_from_base('HasCavity', OntoMOPs) +HasChemicalBuildingUnit = ObjectProperty.create_from_base('HasChemicalBuildingUnit', OntoMOPs) +HasGenericBuildingUnit = ObjectProperty.create_from_base('HasGenericBuildingUnit', OntoMOPs) +HasGenericBuildingUnitNumber = ObjectProperty.create_from_base('HasGenericBuildingUnitNumber', OntoMOPs) +HasPolyhedralShape = ObjectProperty.create_from_base('HasPolyhedralShape', OntoMOPs) +HasProvenance = ObjectProperty.create_from_base('HasProvenance', OntoMOPs) +IsFunctioningAs = ObjectProperty.create_from_base('IsFunctioningAs', OntoMOPs) +IsNumberOf = ObjectProperty.create_from_base('IsNumberOf', OntoMOPs) +# additions for assembler +HasGBUConnectingPoint = ObjectProperty.create_from_base('HasGBUConnectingPoint', OntoMOPs) +HasGBUCoordinateCenter = ObjectProperty.create_from_base('HasGBUCoordinateCenter', OntoMOPs) +HasCBUAssemblyCenter = ObjectProperty.create_from_base('HasCBUAssemblyCenter', OntoMOPs) +HasBindingPoint = ObjectProperty.create_from_base('HasBindingPoint', OntoMOPs) +HasGBUType = ObjectProperty.create_from_base('HasGBUType', OntoMOPs) +HasCBUAssemblyTransformation = ObjectProperty.create_from_base('HasCBUAssemblyTransformation', OntoMOPs) +Transforms = ObjectProperty.create_from_base('Transforms', OntoMOPs) +AlignsTo = ObjectProperty.create_from_base('AlignsTo', OntoMOPs) +# for pore ring +HasPoreRing = ObjectProperty.create_from_base('HasPoreRing', OntoMOPs) +IsFormedBy = ObjectProperty.create_from_base('IsFormedBy', OntoMOPs) +MeasuresPoreRing = ObjectProperty.create_from_base('MeasuresRing', OntoMOPs) +HasPoreSize = ObjectProperty.create_from_base('HasPoreSize', OntoMOPs) +HasPoreDiameter = ObjectProperty.create_from_base('HasPoreDiameter', OntoMOPs) +# for cavity +HasLargestInnerSphereDiameter = ObjectProperty.create_from_base('HasLargestInnerSphereDiameter', OntoMOPs) +HasOuterDiameter = ObjectProperty.create_from_base('HasOuterDiameter', OntoMOPs) + + +# data properties +HasCBUFormula = DatatypeProperty.create_from_base('HasCBUFormula', OntoMOPs) +HasCCDCNumber = DatatypeProperty.create_from_base('HasCCDCNumber', OntoMOPs) +HasMOPFormula = DatatypeProperty.create_from_base('HasMOPFormula', OntoMOPs) +HasModularity = DatatypeProperty.create_from_base('HasModularity', OntoMOPs) +HasOuterCoordinationNumber = DatatypeProperty.create_from_base('HasOuterCoordinationNumber', OntoMOPs) +HasPlanarity = DatatypeProperty.create_from_base('HasPlanarity', OntoMOPs) +HasReferenceDOI = DatatypeProperty.create_from_base('HasReferenceDOI', OntoMOPs) +HasSymbol = DatatypeProperty.create_from_base('HasSymbol', OntoMOPs) +HasSymmetryPointGroup = DatatypeProperty.create_from_base('HasSymmetryPointGroup', OntoMOPs) +HasUnitNumberValue = DatatypeProperty.create_from_base('HasUnitNumberValue', OntoMOPs) +# additions for assembler +HasBindingFragment = DatatypeProperty.create_from_base('HasBindingFragment', OntoMOPs) +HasX = DatatypeProperty.create_from_base('HasX', OntoMOPs) +HasY = DatatypeProperty.create_from_base('HasY', OntoMOPs) +HasZ = DatatypeProperty.create_from_base('HasZ', OntoMOPs) +QuaternionToRotate = DatatypeProperty.create_from_base('QuaternionToRotate', OntoMOPs) +ScaleFactorToAlignCoordinateCenter = DatatypeProperty.create_from_base('ScaleFactorToAlignCoordinateCenter', OntoMOPs) +TranslationVectorToAlignOrigin = DatatypeProperty.create_from_base('TranslationVectorToAlignOrigin', OntoMOPs) +# for pore ring +HasProbingVector = DatatypeProperty.create_from_base('HasProbingVector', OntoMOPs) + + +# classes +class MolecularCage(BaseClass): + rdfs_isDefinedBy = OntoMOPs + +class CoordinationCage(MolecularCage): + pass + +class GenericBuildingUnitType(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasModularity: HasModularity[int] + hasPlanarity: HasPlanarity[str] + + @property + def label(self): + return f"{list(self.hasModularity)[0]}-{list(self.hasPlanarity)[0]}" + +class CoordinatePoint(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasX: HasX[float] + hasY: HasY[float] + hasZ: HasZ[float] + + @property + def coordinates(self): + return Point(x=list(self.hasX)[0], y=list(self.hasY)[0], z=list(self.hasZ)[0]) + +class GenericBuildingUnit(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasGBUType: HasGBUType[GenericBuildingUnitType] + hasGBUCoordinateCenter: HasGBUCoordinateCenter[GBUCoordinateCenter] + + @property + def gbu_type(self): + return list(self.hasGBUType)[0].label + + @property + def is_4_planar(self): + return list(self.hasGBUType)[0].label == '4-planar' + + @property + def modularity(self): + return list(list(self.hasGBUType)[0].hasModularity)[0] + +class GenericBuildingUnitNumber(BaseClass): + rdfs_isDefinedBy = OntoMOPs + isNumberOf: IsNumberOf[GenericBuildingUnit] + hasUnitNumberValue: HasUnitNumberValue[int] + +class PoreRing(BaseClass): + rdfs_isDefinedBy = OntoMOPs + isFormedBy: IsFormedBy[GBUCoordinateCenter] + hasProbingVector: HasProbingVector[str] + + @property + def probing_vector(self): + vec = list(self.hasProbingVector)[0].split('#') + return Vector(x=float(vec[0]), y=float(vec[1]), z=float(vec[2])) + + @property + def pair_of_ring_forming_gbus(self): + _pairs = {} + for cc in self.isFormedBy: + cc: GBUCoordinateCenter + cps = list(cc.hasGBUConnectingPoint) + for cp in cps: + if cp.instance_iri not in _pairs: + _pairs[cp.instance_iri] = [cc] + else: + _pairs[cp.instance_iri].append(cc) + pairs = {k: v for k, v in _pairs.items() if len(v) == 2} + return pairs + +class PoreSize(BaseClass): + rdfs_isDefinedBy = OntoMOPs + measuresPoreRing: MeasuresPoreRing[PoreRing] + hasProbingVector: HasProbingVector[str] + hasPoreDiameter: HasPoreDiameter[om.Diameter] + +class AssemblyModel(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasGenericBuildingUnit: HasGenericBuildingUnit[GenericBuildingUnit] + hasGenericBuildingUnitNumber: HasGenericBuildingUnitNumber[GenericBuildingUnitNumber] + hasPolyhedralShape: HasPolyhedralShape[PolyhedralShape] + hasSymmetryPointGroup: HasSymmetryPointGroup[str] + hasGBUCoordinateCenter: HasGBUCoordinateCenter[GBUCoordinateCenter] + hasGBUConnectingPoint: HasGBUConnectingPoint[GBUConnectingPoint] + hasPoreRing: Optional[HasPoreRing[PoreRing]] = None + + def visualise(self): + rows = [] + for gbu in self.hasGenericBuildingUnit: + gbu: GenericBuildingUnit + for gcc in gbu.hasGBUCoordinateCenter: + gcc: GBUCoordinateCenter + rows.append([gbu.gbu_type, gcc.instance_iri, str(gcc.rdfs_comment), gcc.coordinates.x, gcc.coordinates.y, gcc.coordinates.z]) + for cp in gcc.hasGBUConnectingPoint: + cp: GBUConnectingPoint + rows.append(['ConnectingPoint', cp.instance_iri, str(cp.rdfs_comment), cp.coordinates.x, cp.coordinates.y, cp.coordinates.z]) + df = pd.DataFrame(rows, columns=['Label', 'IRI', 'Position', 'X', 'Y', 'Z']) + + fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='Label', hover_data=['IRI', 'Position']) + fig.update_traces(marker=dict(size=2)) + + return fig + + @staticmethod + def process_geometry_json(am_json, gbu_type_1_label, gbu_type_2_label): + """ + Example JSON for AM (the `am_json` will be the list of dictionaries within the key of the AM label in the JSON file): + { + "(5-pyramidal)x12(2-linear)x30_Ih": [ + { + "Key": "Position_1", + "Label": "5-pyramidal", + "X": 2.6368, + "Y": 2.7551, + "Z": 1.2068, + "Neighbors": [ + { + "Key": "Position_13", + "Label": "2-linear", + "Distance": 2.1029 + }, + { + "Key": "Position_15", + "Label": "2-linear", + "Distance": 2.1029 + }, + ... + ], + "ClosestDummies": ["Dummy_1", "Dummy_2", "Dummy_3", ...] + }, + ... + { + "Key": "Dummy_1", + "Label": "Dummy", + "X": 2.8490, + "Y": 1.7266, + "Z": 1.2590, + "Positions": ["Position_13", "Position_1"] + }, + ... + { + "Key": "Center", + "Label": "Center", + "X": 0.0, + "Y": 0.0, + "Z": 0.0 + } + ] + } + """ + coordinate_point_dict = {} + connecting_point_dict = {} + # handle dummy blocks `{"Label": "Dummy", ...}` + for i in range(len(am_json)): + if am_json[i]['Label'] == 'Dummy': + dummy = GBUConnectingPoint(hasX=am_json[i]['X'], hasY=am_json[i]['Y'], hasZ=am_json[i]['Z']) + connecting_point_dict[am_json[i]['Key']] = dummy + + # handle position blocks `{"Label": "5-pyramidal", ...}` + # TODO the part that getting the gbu_type_1 and gbu_type_2 can be optimised + for i in range(len(am_json)): + if am_json[i]['Label'] in [gbu_type_1_label, gbu_type_2_label]: + coord = GBUCoordinateCenter( + hasX=am_json[i]['X'], + hasY=am_json[i]['Y'], + hasZ=am_json[i]['Z'], + hasGBUConnectingPoint=[connecting_point_dict[d] for d in am_json[i]['ClosestDummies']] + ) + if am_json[i]['Label'] in coordinate_point_dict: + coordinate_point_dict[am_json[i]['Label']].append(coord) + else: + coordinate_point_dict[am_json[i]['Label']] = [coord] + + return coordinate_point_dict, connecting_point_dict + + def add_coordinates_from_json(self, am_json): + gbus = list(self.hasGenericBuildingUnit) + gbu1 = gbus[0] + gbu2 = gbus[1] + coordinate_point_dict, connecting_point_dict = self.__class__.process_geometry_json(am_json, gbu1.gbu_type, gbu2.gbu_type) + gbu1.hasGBUCoordinateCenter = coordinate_point_dict[gbu1.gbu_type] + gbu2.hasGBUCoordinateCenter = coordinate_point_dict[gbu2.gbu_type] + self.hasGBUCoordinateCenter = [c for g in list(coordinate_point_dict.values()) for c in g] + self.hasGBUConnectingPoint = list(connecting_point_dict.values()) + + @classmethod + def from_geometry_json( + cls, + am_json, + gbu_type_1: GenericBuildingUnitType, + gbu_type_2: GenericBuildingUnitType, + gbu_number_1: int, + gbu_number_2: int, + am_symmetry: str, + polyhedral_shape: str = None, + ): + + # process the AM geometry JSON + coordinate_point_dict, connecting_point_dict = cls.process_geometry_json(am_json, gbu_type_1.label, gbu_type_2.label) + + # instantiate the GBUs + gbu1 = GenericBuildingUnit(hasGBUType=gbu_type_1, hasGBUCoordinateCenter=coordinate_point_dict[gbu_type_1.label]) + gbu2 = GenericBuildingUnit(hasGBUType=gbu_type_2, hasGBUCoordinateCenter=coordinate_point_dict[gbu_type_2.label]) + + return cls( + hasGenericBuildingUnit=[gbu1, gbu2], + hasGenericBuildingUnitNumber=[ + GenericBuildingUnitNumber(isNumberOf=gbu1, hasUnitNumberValue=gbu_number_1), + GenericBuildingUnitNumber(isNumberOf=gbu2, hasUnitNumberValue=gbu_number_2)], + hasPolyhedralShape=polyhedral_shape if polyhedral_shape is not None else set(), + hasSymmetryPointGroup=am_symmetry, + hasGBUCoordinateCenter=[c for g in list(coordinate_point_dict.values()) for c in g], + hasGBUConnectingPoint=list(connecting_point_dict.values()) + ) + + def gbu_of_type(self, gbu_type): + return [gbu for gbu in self.hasGenericBuildingUnit if gbu_type == gbu.gbu_type] + + @property + def pairs_of_connected_gbus(self) -> Dict: + pairs = {} + for cc in self.hasGBUCoordinateCenter: + cc: GBUCoordinateCenter + cps = list(cc.hasGBUConnectingPoint) + for cp in cps: + if cp.instance_iri not in pairs: + pairs[cp.instance_iri] = [cc] + else: + pairs[cp.instance_iri].append(cc) + return pairs + +class Provenance(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasReferenceDOI: HasReferenceDOI[str] + +class Volume(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasValue: om.HasValue[om.Measure] + +class Cavity(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasLargestInnerSphereDiameter: HasLargestInnerSphereDiameter[om.Diameter] + +class PolyhedralShape(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasSymbol: HasSymbol[str] + +class BindingPoint(CoordinatePoint): + pass + +class BindingSite(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasOuterCoordinationNumber: HasOuterCoordinationNumber[int] + hasBindingPoint: HasBindingPoint[BindingPoint] + hasBindingFragment: HasBindingFragment[str] + temporarily_blocked: Optional[bool] = False + + @property + def binding_coordinates(self) -> Point: + return list(self.hasBindingPoint)[0].coordinates + + def binding_atoms(self, atoms: List[Point]) -> List[Point]: + # count the atoms + def get_atom_count(s): + atom_counts = {} + i = 0 + while i < len(s): + if s[i].isupper(): + name = s[i] + i += 1 + # collect lowercase characters to form the full name + while i < len(s) and s[i].islower(): + name += s[i] + i += 1 + # collect digits to count the atoms (if any) + count = 1 + num = '' + while i < len(s) and s[i].isdigit(): + num += s[i] + i += 1 + count = int(num) if num else 1 + # store the count of atoms + atom_counts[name] = atom_counts.get(name, 0) + count + else: + i += 1 + return atom_counts + + counts = get_atom_count(list(self.hasBindingFragment)[0]) + # binding fragments situations: CO2, O, N2, single metal, multiple metals + # find the centroid of the relevant atoms that are close + sorted_atoms = self.binding_coordinates.rank_distance_to_points(atoms) + all_binding_atoms = [] + for atom, num in counts.items(): + all_binding_atoms.extend([a for a in sorted_atoms if a.label == atom][:num]) + return all_binding_atoms + + @staticmethod + def compute_assembly_center_from_binding_sites( + lst_binding_sites: List['BindingSite'], + atom_points: List['Point'], + gbu_type: str, + binding_fragment: str, + ) -> 'CBUAssemblyCenter': + # compute the CBU assembly center, which is the geometry (coordinates) center point of the CBU structure (average of all atoms) + # projected on the normal vector of the plane formed by the binding sites (dummy atoms) that pass through the circumcenter point of all binding sites + lst_binding_points = [bs.binding_coordinates for bs in lst_binding_sites] + if len(lst_binding_points) > 2: + # when the number of binding sites are greater than 2 + # find the plane from the binding sites and the circumcenter of the binding sites + # then project the center of the CBU on the normal vector of the plane + # to find the assembly center of the CBU + # even the assembly center is overshooting from the molecule + # the sin/cos calculations will make sure its coordinates is transformed correctly + cbu_binding_sites_plane = Plane.fit_from_points(lst_binding_points) + cbu_binding_sites_circumcenter = Point.fit_circle_2d(lst_binding_points)[0] + cbu_geo_center = Point.centroid(atom_points) + line = Line(point=cbu_binding_sites_circumcenter, direction=cbu_binding_sites_plane.normal) + cbu_assemb_center = line.project_point(cbu_geo_center) + else: + bindingsite_mid_point = Point.mid_point(*lst_binding_points) + if 'linear' in gbu_type.lower(): + # if the CBU is expected to function as 2-linear, take the average of the two binding sites + cbu_assemb_center = bindingsite_mid_point + else: + # if the CBU is expected to function as 2-bent, then compute the intersection of the two vectors + # to be used as the third point to fit a plane with the two binding sites + if binding_fragment == BINDING_FRAGMENT_CO2: + # first find the closest two O atoms and the C atoms for each binding sites + v_dct = {} + lst_pts_for_plane = [] + for bs in lst_binding_sites: + bp: Point = bs.binding_coordinates + ranked_atoms = bp.rank_distance_to_points(atom_points) + closest_two_O = [a for a in ranked_atoms if a.label == 'O'][:2] + closest_C = [a for a in ranked_atoms if a.label == 'C'][0] + # find the average of the O atoms and use it to form line with the closest C atom + avg_O = Point.mid_point(*closest_two_O) + v_dct[bs.instance_iri] = {'avg_O': avg_O, 'closest_C': closest_C, 'line': Line.from_two_points(start=avg_O, end=closest_C)} + lst_pts_for_plane.extend([closest_C, avg_O]) + # find the plane from these points + plane = Plane.fit_from_points(lst_pts_for_plane) + # find the intersection of these lines when they are projected on the plane + proj_lines = [v['line'] for v in v_dct.values()] + intersection = plane.find_intersection_of_lines_projected(*proj_lines) + # use the intersection and two binding site to form the plane + plane_of_bs = Plane.from_three_points(pt1=intersection, pt2=lst_binding_points[0], pt3=lst_binding_points[1]) + perpendicular_bisector = plane_of_bs.find_perpendicular_bisector_on_plane(*lst_binding_points) + cbu_assemb_center = perpendicular_bisector.project_point(intersection) + else: + raise NotImplementedError(f'CBUs functioning as a {gbu_type} with binding fragment {binding_fragment} is not yet supported.') + return CBUAssemblyCenter(hasX=cbu_assemb_center.x, hasY=cbu_assemb_center.y, hasZ=cbu_assemb_center.z) + +class MetalSite(BindingSite): + pass + +class OrganicSite(BindingSite): + pass + +class BindingDirection(BaseClass): + rdfs_isDefinedBy = OntoMOPs + +class DirectBinding(BindingDirection): + pass + +class SidewayBinding(BindingDirection): + pass + +class GBUConnectingPoint(CoordinatePoint): + pass + +class GBUCoordinateCenter(CoordinatePoint): + hasGBUConnectingPoint: HasGBUConnectingPoint[GBUConnectingPoint] + + @property + def vector_from_am_center(self) -> Vector: + # NOTE TODO this assumes that the center of the AM is the (0, 0, 0) point + return Vector.from_two_points(start=Point(x=0, y=0, z=0), end=self.coordinates) + + @property + def distance_to_am_center(self): + return self.coordinates.get_distance_to(Point(x=0, y=0, z=0)) + + @property + def vector_to_connecting_point_plane(self): + # TODO add safeguards for str IRIs + connecting_points = [p.coordinates for p in self.hasGBUConnectingPoint] + if len(connecting_points) < 3: + line = Line.from_two_points(start=connecting_points[0], end=connecting_points[1]) + if line.is_point_on_line(self.coordinates): + # if the center point is on the line then we take the normal vector of the line + # the normal vector has to be on the plane formed by the connecting points and the AM center point + # i.e. Point(0, 0, 0) + # TODO check if it is safe to make this assumption for all CBUs + _v = line.normal_vector_from_point_to_line(Point(x=0, y=0, z=0)) + # flip it to point in the direction of center + v = Vector.from_array(-_v.as_array) + else: + v = line.normal_vector_from_point_to_line(self.coordinates) + else: + plane = Plane.fit_from_points([bs for bs in connecting_points], Point(x=0, y=0, z=0)) + v = plane.normal + return v + + @property + def vector_to_farthest_connecting_point(self): + # find the plane perpendicular to the vector to the average connecting point + plane = Plane.from_point_and_normal(self.coordinates, self.vector_to_connecting_point_plane) + # project all connecting points onto the plane + projected_points = [plane.project_point(p.coordinates) for p in self.hasGBUConnectingPoint] + # find the farthest connecting point and construct a vector from center to it + farthest_projected_point = self.coordinates.farthest_point(projected_points) + vector = Vector.from_two_points(start=self.coordinates, end=farthest_projected_point) + return vector + + @property + def vector_to_shortest_side(self): + # find the plane perpendicular to the vector to the average connecting point + plane = Plane.from_point_and_normal(self.coordinates, self.vector_to_connecting_point_plane) + # project all connecting points onto the plane + projected_points = [plane.project_point(p.coordinates) for p in self.hasGBUConnectingPoint] + # find the closest pair of connecting points and construct a vector connecting the center to the line connecting them + closest_pair = Point.closest_pair(projected_points) + line = Line.from_two_points(start=closest_pair[0], end=closest_pair[1]) + v = line.normal_vector_from_point_to_line(self.coordinates) + return v + +class CBUAssemblyCenter(CoordinatePoint): + pass + +class ChemicalBuildingUnit(BaseClass): + rdfs_isDefinedBy = OntoMOPs + hasBindingDirection: HasBindingDirection[BindingDirection] + hasBindingSite: HasBindingSite[BindingSite] + isFunctioningAs: IsFunctioningAs[GenericBuildingUnit] + hasCharge: ontospecies.HasCharge[ontospecies.Charge] + hasMolecularWeight: ontospecies.HasMolecularWeight[ontospecies.MolecularWeight] + hasGeometry: ontospecies.HasGeometry[ontospecies.Geometry] + hasCBUFormula: HasCBUFormula[str] + hasCBUAssemblyCenter: HasCBUAssemblyCenter[CBUAssemblyCenter] + + @property + def charge(self): + return list(list(list(self.hasCharge)[0].hasValue)[0].hasNumericalValue)[0] + + @property + def molecular_weight(self): + return list(list(list(self.hasMolecularWeight)[0].hasValue)[0].hasNumericalValue)[0] + + @property + def cbu_formula(self): + return f"[{list(self.hasCBUFormula)[0].rstrip(']').lstrip('[')}]" + + @property + def assembly_center(self): + return list(self.hasCBUAssemblyCenter)[0].coordinates + + @property + def highest_modularity_gbu(self): + return max([gbu for gbu in list(self.isFunctioningAs)], key=lambda x: x.modularity) + + @property + def is_metal_cbu(self): + return all([isinstance(bs, MetalSite) for bs in list(self.hasBindingSite)]) + + @property + def active_binding_sites(self): + return [bs for bs in list(self.hasBindingSite) if not bs.temporarily_blocked] + + def allocate_active_binding_sites(self, num: int): + for bs in list(self.hasBindingSite)[num:]: + bs.temporarily_blocked = True + + def release_blocked_binding_sites(self): + for bs in self.hasBindingSite: + bs.temporarily_blocked = False + + def load_geometry_from_fileserver(self, sparql_client): + return list(self.hasGeometry)[0].load_xyz_from_geometry_file(sparql_client) + + def add_binding_site_and_assembly_center_from_json( + self, cbu_json_fpath, ocn, binding_fragment: str, gbu_type: str, metal_site: bool = False + ): + with open(cbu_json_fpath, "r") as file: + cbu_json = json.load(file) + binding_sides, assemb_center, atom_points = self.__class__.process_geometry_json( + cbu_json, ocn, binding_fragment, gbu_type, metal_site) + self.hasBindingSite = binding_sides + self.hasCBUAssemblyCenter = assemb_center + + @staticmethod + def process_geometry_json(cbu_json, ocn: int, binding_fragment: str, gbu_type: str, metal_site: bool = False): + """ + Example of CBU json consisting of real atoms, dummy atoms as binding sites, and an optional "CENTER" point. + Note that below coordinates are just for demonstration purpose, they do not represent any actual molecules. + { + "atom_uuid": {"atom": "C", "coordinate_x": 0.0, "coordinate_y": 0.0, "coordinate_z": 0.0}, + ... + "dummy_uuid": {"atom": "X", "coordinate_x": -0.1, "coordinate_y": 2.1, "coordinate_z": 5.3}, + "CENTER": {"atom": "CENTER", "coordinate_x": 0.0, "coordinate_y": 0.0, "coordinate_z": 0.0} + } + """ + cbu_binding_points = {} + lst_binding_sites = [] + cbu_atoms = {} + atom_points = [] + cbu_atoms_acc_x = 0.0 + cbu_atoms_acc_y = 0.0 + cbu_atoms_acc_z = 0.0 + + # iterate through the json file and process the coordinates + _bs_clz = MetalSite if metal_site else OrganicSite + for k, v in cbu_json.items(): + if v['atom'] == 'X': + pt = _bs_clz( + hasOuterCoordinationNumber=ocn, + hasBindingPoint=BindingPoint(hasX=v['coordinate_x'], hasY=v['coordinate_y'], hasZ=v['coordinate_z']), + hasBindingFragment=binding_fragment, + ) + cbu_binding_points[k] = pt + lst_binding_sites.append(pt) + elif str(v['atom']).lower() == 'center': + print('NOTE!!! Center point is not used in the current implementation.') + else: + pt = Point(x=v['coordinate_x'], y=v['coordinate_y'], z=v['coordinate_z'], label=v['atom']) + cbu_atoms_acc_x += v['coordinate_x'] + cbu_atoms_acc_y += v['coordinate_y'] + cbu_atoms_acc_z += v['coordinate_z'] + cbu_atoms[k] = pt + atom_points.append(pt) + + assemb_center = BindingSite.compute_assembly_center_from_binding_sites(lst_binding_sites, atom_points, gbu_type, binding_fragment) + + return lst_binding_sites, assemb_center, atom_points + + @classmethod + def from_geometry_json(cls, cbu_formula, cbu_json, charge, ocn, binding_fragment, gbu_type, gbu: str = None, direct_binding: bool = True, metal_site: bool = False): + """ + Example of CBU json consisting of real atoms, dummy atoms as binding sites, and an optional "CENTER" point. + Note that below coordinates are just for demonstration purpose, they do not represent any actual molecules. + { + "atom_uuid": {"atom": "C", "coordinate_x": 0.0, "coordinate_y": 0.0, "coordinate_z": 0.0}, + ... + "dummy_uuid": {"atom": "X", "coordinate_x": -0.1, "coordinate_y": 2.1, "coordinate_z": 5.3}, + "CENTER": {"atom": "CENTER", "coordinate_x": 0.0, "coordinate_y": 0.0, "coordinate_z": 0.0} + } + """ + + if not direct_binding: + raise NotImplementedError("Non-direct binding, e.g. side binding, is not yet supported.") + + binding_sides, assemb_center, atom_points = cls.process_geometry_json(cbu_json, ocn, binding_fragment, gbu_type, metal_site) + # prepare the geometry of the CBU + cbu_iri = cls.init_instance_iri() + cbu_xyz_file = f"{cbu_iri.split('/')[-1]}.xyz" + cbu_geo = ontospecies.Geometry.from_points(atom_points, cbu_xyz_file) + + # instantiate actual CBU + return cls( + instance_iri=cbu_iri, + # TODO hasBindingDirection should be modified once side-binding is implemented + hasBindingDirection='https://www.theworldavatar.com/kg/ontomops/DirectBinding_f3716525-0a8d-430f-ae24-0a043ec0c93a', + hasBindingSite=binding_sides, + isFunctioningAs=gbu if gbu is not None else set(), + hasCharge=ontospecies.Charge(hasValue=om.Measure(hasNumericalValue=charge, hasUnit=om.elementaryCharge)), + hasMolecularWeight=ontospecies.MolecularWeight.from_xyz_file(cbu_xyz_file), + hasGeometry=cbu_geo, + hasCBUFormula=cbu_formula, + hasCBUAssemblyCenter=assemb_center + ) + + @property + def vector_to_binding_site_plane(self): + # get the center of the CBU + center = list(self.hasCBUAssemblyCenter)[0].coordinates + # get the line or plane of the binding sites + binding_sites: List[BindingSite] = self.active_binding_sites + if len(binding_sites) < 3: + line = Line.from_two_points(start=binding_sites[0].binding_coordinates, end=binding_sites[1].binding_coordinates) + if line.is_point_on_line(center): + # if the center point is on the line then we approximate the plane fitted by the binding fragments of CBU + # to each binding site and we take the normal vector of the plane + lst_binding_atoms = [] + for bs in binding_sites: + lst_binding_atoms.extend(bs.binding_atoms(list(self.hasGeometry)[0].hasPoints)) + plane = Plane.fit_from_points(lst_binding_atoms) + v = plane.normal_vector_from_point_to_plane(center) + else: + v = line.normal_vector_from_point_to_line(center) + else: + plane = Plane.fit_from_points([bs.binding_coordinates for bs in binding_sites]) + v = plane.normal_vector_from_point_to_plane(center) + return v + + @property + def vector_to_farthest_binding_site(self): + # NOTE this needs to be after the rotation + # find the plane perpendicular to the vector to the average connecting point + center = list(self.hasCBUAssemblyCenter)[0].coordinates + plane = Plane.from_point_and_normal(center, self.vector_to_binding_site_plane) + # project all connecting points onto the plane + projected_points = [plane.project_point(bs.binding_coordinates) for bs in self.active_binding_sites] + # find the farthest binding site and construct a vector connecting the center to it + farthest_projected_point = center.farthest_point(projected_points) + vector = Vector.from_two_points(start=center, end=farthest_projected_point) + return vector + + @property + def vector_to_shortest_side(self): + # NOTE this needs to be after the rotation + # find the plane perpendicular to the vector to the average connecting point + center = list(self.hasCBUAssemblyCenter)[0].coordinates + plane = Plane.from_point_and_normal(center, self.vector_to_binding_site_plane) + # project all connecting points onto the plane + projected_points = [plane.project_point(bs.binding_coordinates) for bs in self.active_binding_sites] + # find the closest pair of binding sites and construct a vector connecting the center to the line connecting them + closest_pair = Point.closest_pair(projected_points) + line = Line.from_two_points(start=closest_pair[0], end=closest_pair[1]) + v = line.normal_vector_from_point_to_line(center) + return v + + def vector_of_most_possible_binding_site(self, gbu_plane: Plane, rotation_matrices: List[RotationMatrix] = [RotationMatrix.identity()]): + # find the binding site that is most likely to bind with the other GBU + # this is the one that has the smallest angle to the plane of the two GBU coordinate centers + center: Point = list(self.hasCBUAssemblyCenter)[0].coordinates + # rotate the binding sites with the rotation matrix + most_possible_binding_site_angle = 90 # in degrees + rotated_binding_vector = None + length_center_to_binding = None + for bs in self.active_binding_sites: + bs: BindingSite + v = Vector.from_two_points(start=center, end=bs.binding_coordinates) + # rotate the vector to align with the GBU coordinate center + for r in rotation_matrices: + v = Vector.from_array(r.apply(v.as_array)) + angle = abs(90 - v.get_deg_angle_to(gbu_plane.normal)) + if angle < most_possible_binding_site_angle: + rotated_binding_vector = v + most_possible_binding_site_angle = angle + # NOTE we are not rotating atoms here as we are computing the distance which are not changed before/after rotation + # and we are using the original coordinates of the binding site, therefore the original coordinates of atoms would work + binding_atoms = bs.binding_atoms(list(self.hasGeometry)[0].hasPoints) + length_center_to_binding_atoms = center.get_distance_to(Point.centroid(binding_atoms)) + + # NOTE here we are adjusting the length from center to binding fragment (atoms) to account for the half bond length (covalent radius) + # although some of the binding site in the initial data already has additional length + # i.e. the dummy atom already away from the actually binding atoms + # we will compute on-the-fly to determine the suitable side length to be added + # beyond the distance between assembly center to the binding atoms + # NOTE TODO here we take the shortcut that we take the minimal covalent radius of the binding atoms as the half bond length + # NOTE TODO to improve it, one might want to also consider the angle between the two sets of binding sites + # NOTE TODO as the actual bond formed will be a projection of such added length + # NOTE it's generally easier to optimise the geometry if the molecules are too far compared to overlapping (see SI of 10.1021/jp507643v) + # NOTE so one could potentially add more distance to it to ensure there's no overlapping + length_center_to_binding = length_center_to_binding_atoms + min([cap.PERIODIC_TABLE.GetRcovalent(a.label) for a in binding_atoms]) + return rotated_binding_vector, most_possible_binding_site_angle, length_center_to_binding + + def visualise(self, sparql_client = None): + rows = [] + if list(self.hasGeometry)[0].hasPoints is None: + if sparql_client is None: + raise ValueError('SPARQL client is required to visualise/load the geometry') + self.load_geometry_from_fileserver(sparql_client) + # atoms + for pt in list(self.hasGeometry)[0].hasPoints: + rows.append([pt.label, pt.x, pt.y, pt.z]) + # binding sites + for pt in list(self.hasBindingSite): + rows.append(['BindingSite', pt.binding_coordinates.x, pt.binding_coordinates.y, pt.binding_coordinates.z]) + # assembly center + rows.append(['AssemblyCenter', self.assembly_center.x, self.assembly_center.y, self.assembly_center.z]) + df = pd.DataFrame(rows, columns=['Atom', 'X', 'Y', 'Z',]) + fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='Atom', title=f'CBU: {list(self.hasCBUFormula)[0]}') + fig.update_traces(marker=dict(size=2)) + fig.update_layout(autosize=False, width=1200, height=400) + fig.show() + return fig + + +class CBUAssemblyTransformation(BaseClass): + rdfs_isDefinedBy = OntoMOPs + transforms: Transforms[ChemicalBuildingUnit] + alignsTo: AlignsTo[GBUCoordinateCenter] + quaternionToRotate: QuaternionToRotate[str] + scaleFactorToAlignCoordinateCenter: ScaleFactorToAlignCoordinateCenter[float] + translationVectorToAlignOrigin: TranslationVectorToAlignOrigin[str] + + @property + def transformed_binding_sites(self): + cbu = list(self.transforms)[0] + if isinstance(cbu, str): + cbu: ChemicalBuildingUnit = KnowledgeGraph.get_object_from_lookup(cbu) + gcc = list(self.alignsTo)[0] + if isinstance(gcc, str): + gcc: GBUCoordinateCenter = KnowledgeGraph.get_object_from_lookup(gcc) + # retrieve the active binding sites + bs_points = [bs.binding_coordinates for bs in cbu.active_binding_sites] + # rotate according to the quaternion + rotation_matrix = Quaternion.from_string(list(self.quaternionToRotate)[0]).as_rotation_matrix() + rotated_bs_points = [Point.from_array(rotation_matrix.apply(bs.as_array)) for bs in bs_points] + # scale the rotated binding sites + scaling_factor = list(self.scaleFactorToAlignCoordinateCenter)[0] + translate_vector_for_scaling = Point.from_array( + rotation_matrix.apply(list(cbu.hasCBUAssemblyCenter)[0].coordinates.as_array) + ).get_translation_vector_to(Point.scale(gcc.coordinates, scaling_factor)) + scaled_bs_points = [Point.translate(bs, translate_vector_for_scaling) for bs in rotated_bs_points] + # translation for the final alignment to minimise numerical errors + translation_vector = Vector.from_string(list(self.translationVectorToAlignOrigin)[0]) + translated_bs_points = [Point.translate(bs, translation_vector) for bs in scaled_bs_points] + return {gcc: translated_bs_points} + + +class MetalOrganicPolyhedron(CoordinationCage): + hasAssemblyModel: HasAssemblyModel[AssemblyModel] + hasCavity: Optional[HasCavity[Cavity]] = None + hasChemicalBuildingUnit: HasChemicalBuildingUnit[ChemicalBuildingUnit] + hasProvenance: HasProvenance[Provenance] + hasCharge: ontospecies.HasCharge[ontospecies.Charge] + hasMolecularWeight: ontospecies.HasMolecularWeight[ontospecies.MolecularWeight] + hasGeometry: Optional[ontospecies.HasGeometry[ontospecies.Geometry]] = None + hasCCDCNumber: Optional[HasCCDCNumber[str]] = None + hasCBUAssemblyTransformation: Optional[HasCBUAssemblyTransformation[CBUAssemblyTransformation]] = None + hasMOPFormula: HasMOPFormula[str] + hasPoreSize: Optional[HasPoreSize[PoreSize]] = None + hasOuterDiameter: Optional[HasOuterDiameter[om.Diameter]] = None + + @classmethod + def from_assemble( + cls, + am: AssemblyModel, + lst_cbu: List[ChemicalBuildingUnit], + prov: Provenance = set(), + ccdc: str = set(), + sparql_client = None, + upload_geometry: bool = False, + ): + # prepare the variables + mop_charge = 0 + mop_mw = 0 + mop_formula = '' + + # locate the GBUs to build the AM + gbus = list(am.hasGenericBuildingUnit) + # place the CBUs according to the GBUs + cbu_rotation_matrix = {} + for cbu in lst_cbu: + cbu: ChemicalBuildingUnit + if list(cbu.hasGeometry)[0].hasPoints is None: + if sparql_client is None: + raise ValueError('SPARQL client is required to visualise/load the geometry') + list(cbu.hasGeometry)[0].load_xyz_from_geometry_file(sparql_client) + gbu = cbu.isFunctioningAs.intersection(gbus) + if len(gbu) == 0: + raise ValueError(f'No GBU found for CBU {cbu.instance_iri} in AM {am.instance_iri}') + elif len(gbu) > 1: + raise ValueError(f'Multiple GBUs found for CBU {cbu.instance_iri} in AM {am.instance_iri}: {gbu}') + else: + gbu = gbu.pop() + if type(gbu) is not GenericBuildingUnit: + gbu: GenericBuildingUnit = KnowledgeGraph.get_object_from_lookup(gbu) + # NOTE here we need to block the binding sites based on the GBU + cbu.allocate_active_binding_sites(gbu.modularity) + # TODO optimise below + # rotate the CBU to match the GBU + # rotate the vector from center to binding site plane of CBU to the vector from center to connecting point plane of GBU + rotation_matrix_1 = { + gbu_center.instance_iri: cbu.vector_to_binding_site_plane.get_rotation_matrix_to_parallel( + gbu_center.vector_to_connecting_point_plane, flip_if_180=True) for gbu_center in gbu.hasGBUCoordinateCenter + } + # rotate the normal vector of the line connecting the farthest pair of binding sites of CBU to the same vector of GBU + # NOTE that here we are getting the rotation matrix for the vector that is already rotated + rotation_matrix_2 = {} + for gbu_center in gbu.hasGBUCoordinateCenter: + gbu_center: GBUCoordinateCenter + if gbu.is_4_planar: + second_vector_for_alignment_cbu = cbu.vector_to_shortest_side + second_vector_for_alignment_gbu = gbu_center.vector_to_shortest_side + else: + second_vector_for_alignment_cbu = cbu.vector_to_farthest_binding_site + second_vector_for_alignment_gbu = gbu_center.vector_to_farthest_connecting_point + rotated = rotation_matrix_1[gbu_center.instance_iri].apply(second_vector_for_alignment_cbu.as_array) + rotated_cbu_center_to_binding_site_plane = Vector.from_array(rotation_matrix_1[gbu_center.instance_iri].apply(cbu.vector_to_binding_site_plane.as_array)) + rotation_matrix_2[gbu_center.instance_iri] = Vector.from_array(rotated).get_rotation_matrix_to_parallel( + second_vector_for_alignment_gbu, flip_if_180=True, base_axis_if_180=rotated_cbu_center_to_binding_site_plane) + # put the two rotation matrix together + cbu_rotation_matrix[cbu.instance_iri] = { + gbu_center.instance_iri: [ + rotation_matrix_1[gbu_center.instance_iri], rotation_matrix_2[gbu_center.instance_iri] + ] for gbu_center in gbu.hasGBUCoordinateCenter + } + # calculate the charge and molecular weight of the MOP + mop_charge += cbu.charge * len(gbu.hasGBUCoordinateCenter) + mop_mw += cbu.molecular_weight * len(gbu.hasGBUCoordinateCenter) + # prepare the mop_formula + mop_formula += f'{cbu.cbu_formula}{len(gbu.hasGBUCoordinateCenter)}' + + # find any connecting point and its two ends of GBUs + pair_gbu_center = list(am.pairs_of_connected_gbus.values())[0] + gc1: GBUCoordinateCenter = pair_gbu_center[0] + gc2: GBUCoordinateCenter = pair_gbu_center[1] + v_gbu1: Vector = gc1.vector_from_am_center + v_gbu2: Vector = gc2.vector_from_am_center + # calculate the degree between the two GBUs vectors and the plane these two vectors form + theta_rad = v_gbu1.get_rad_angle_to(v_gbu2) + plane = Plane.from_two_vectors(v_gbu1, v_gbu2) + # get the projection of the rotated CBU vectors (from coordinate center to binding site) onto the plane + for cbu_iri, rm_to_gbu in cbu_rotation_matrix.items(): + # TODO optimise the below + cbu = KnowledgeGraph.get_object_from_lookup(cbu_iri) + if gc1.instance_iri in rm_to_gbu: + # get the rotation matrix for the CBU, for the first GBU + rm = rm_to_gbu[gc1.instance_iri] + rotated_binding_vector_cbu1, vector_plane_angle_cbu1, adjusted_side_length_cbu1 = cbu.vector_of_most_possible_binding_site(plane, rm) + projected_adjusted_side_length_cbu1 = adjusted_side_length_cbu1 * np.cos(np.deg2rad(vector_plane_angle_cbu1)) + projected_binding_vector_cbu1 = plane.get_projected_vector(rotated_binding_vector_cbu1) + _gbu_projected_cbu_angle_cbu1 = projected_binding_vector_cbu1.get_rad_angle_to(v_gbu1) + # NOTE the projected angle is the outer angle so we need to subtract it from pi + gbu_projected_cbu_angle_cbu1 = np.pi - _gbu_projected_cbu_angle_cbu1 + l_vertical_cbu1 = projected_adjusted_side_length_cbu1 * np.sin(gbu_projected_cbu_angle_cbu1) + distance_to_am_center_gbu1 = gc1.distance_to_am_center + elif gc2.instance_iri in rm_to_gbu: + # get the rotation matrix for the CBU, for the second GBU + rm = rm_to_gbu[gc2.instance_iri] + rotated_binding_vector_cbu2, vector_plane_angle_cbu2, adjusted_side_length_cbu2 = cbu.vector_of_most_possible_binding_site(plane, rm) + projected_adjusted_side_length_cbu2 = adjusted_side_length_cbu2 * np.cos(np.deg2rad(vector_plane_angle_cbu2)) + projected_binding_vector_cbu2 = plane.get_projected_vector(rotated_binding_vector_cbu2) + _gbu_projected_cbu_angle_cbu2 = projected_binding_vector_cbu2.get_rad_angle_to(v_gbu2) + # NOTE the projected angle is the outer angle so we need to subtract it from pi + gbu_projected_cbu_angle_cbu2 = np.pi - _gbu_projected_cbu_angle_cbu2 + l_vertical_cbu2 = projected_adjusted_side_length_cbu2 * np.sin(gbu_projected_cbu_angle_cbu2) + distance_to_am_center_gbu2 = gc2.distance_to_am_center + else: + raise ValueError(f'No rotation matrix found for CBU {cbu.instance_iri} in AM {am.instance_iri}') + # equation to solve: sin(omega)/l_vertical_cbu1 = sin(theta-omega)/l_vertical_cbu2 + initial_guess = theta_rad / 2 + omega = fsolve(lambda x: np.sin(x)/l_vertical_cbu1 - np.sin(theta_rad - x)/l_vertical_cbu2, initial_guess) + shared_side = l_vertical_cbu1 / np.sin(omega) + scaled_cbu1 = math.sqrt(projected_adjusted_side_length_cbu1**2 + shared_side**2 - 2*projected_adjusted_side_length_cbu1*shared_side*np.cos(np.pi - gbu_projected_cbu_angle_cbu1 - omega)) + scaled_cbu2 = math.sqrt(projected_adjusted_side_length_cbu2**2 + shared_side**2 - 2*projected_adjusted_side_length_cbu2*shared_side*np.cos(np.pi - gbu_projected_cbu_angle_cbu2 - (theta_rad - omega))) + # calculate the scaling factor for the CBU + scaling_factor_cbu1 = scaled_cbu1 / distance_to_am_center_gbu1 + scaling_factor_cbu2 = scaled_cbu2 / distance_to_am_center_gbu2 + + # apply all rotation to the cbu + # rotate the cbu to be parallel to the gbu + cbu_translated = {} + cbu_translation_vector = {} + for cbu_iri, rm_to_gbu in cbu_rotation_matrix.items(): + # TODO optimise the below + cbu = KnowledgeGraph.get_object_from_lookup(cbu_iri) + if list(cbu.hasGeometry)[0].hasPoints is None: + if sparql_client is None: + raise ValueError('SPARQL client is required to load the geometry') + cbu.load_geometry_from_fileserver(sparql_client) + dct_rotated = { + gc: [Point.from_array(rm_to_gbu[gc][1].apply(rm_to_gbu[gc][0].apply(pt.as_array)), label=pt.label) for pt in list(cbu.hasGeometry)[0].hasPoints] for gc in rm_to_gbu + } + # find the translation vector of the cbu center to the gbu center + # note that the gbu center need to be the scaled version of the gbu center + dct_translation_vector = { + gc: Point.from_array( + rm_to_gbu[gc][1].apply(rm_to_gbu[gc][0].apply(list(cbu.hasCBUAssemblyCenter)[0].coordinates.as_array)) + ).get_translation_vector_to( + Point.scale( + KnowledgeGraph.get_object_from_lookup(gc).coordinates, + scaling_factor_cbu1 if gc1.instance_iri in rm_to_gbu else scaling_factor_cbu2 + ) + ) for gc in rm_to_gbu + } + # translate the cbu to the gbu + dct_translated = { + gc: [Point.translate(pt, dct_translation_vector[gc]) for pt in dct_rotated[gc]] for gc in rm_to_gbu + } + cbu_translated[cbu_iri] = dct_translated + cbu_translation_vector[cbu_iri] = dct_translation_vector + + # shift all atoms to have center at (0, 0, 0) + # this makes sure the numerical error is minimised + _lst_points = [] + for cbu, gcc_pts in cbu_translated.items(): + for gcc, pts in gcc_pts.items(): + _lst_points.extend(pts) + _atoms = [p for p in _lst_points if p.label.lower() not in ['x', 'center']] + adjusted_atoms, translation_vector_to_origin = Point.translate_points_to_target_centroid(_atoms, Point.from_array([0, 0, 0])) + + # collect information on the CBU transformation + cbu_assembly_transformation_lst = [] + cbu_binding_sites_transformation_dct = {} + for cbu_iri, rm_to_gbu in cbu_rotation_matrix.items(): + for gc in rm_to_gbu: + cbu_transformation = CBUAssemblyTransformation( + transforms=cbu_iri, + alignsTo=gc, + quaternionToRotate=rm_to_gbu[gc][1].combine(rm_to_gbu[gc][0]).as_quaternion_str(), + scaleFactorToAlignCoordinateCenter=scaling_factor_cbu1 if gc1.instance_iri in rm_to_gbu else scaling_factor_cbu2, + translationVectorToAlignOrigin=translation_vector_to_origin.as_str() + ) + cbu_assembly_transformation_lst.append(cbu_transformation) + cbu_binding_sites_transformation_dct.update(cbu_transformation.transformed_binding_sites) + + # calculate cavity (in terms of largest inner sphere diameter), outer diameter, and pore size diameter + inner_diameter_atom, inner_diameter, inner_volume = cap.largest_inner_sphere_diameter(adjusted_atoms) + outer_diameter = cap.outer_diameter(adjusted_atoms) + pore_sizes = [] + for pr in am.hasPoreRing: + pr: PoreRing + lst_of_points_for_probing_vector = [] + pair_gbus = pr.pair_of_ring_forming_gbus + for pair in pair_gbus.values(): + pair_of_binding_sites = Point.closest_pair_across_lists(cbu_binding_sites_transformation_dct[pair[0]], cbu_binding_sites_transformation_dct[pair[1]]) + lst_of_points_for_probing_vector.extend(pair_of_binding_sites) + probing_vector: Vector = Vector.from_two_points(start=Point(x=0, y=0, z=0), end=Point.centroid(lst_of_points_for_probing_vector)) + ps_val = cap.pore_size_diameter(adjusted_atoms, probing_vector) + pore_sizes.append( + PoreSize( + measuresPoreRing=pr, + hasProbingVector=probing_vector.as_str(), + hasPoreDiameter=om.Diameter(hasValue=om.Measure(hasNumericalValue=ps_val, hasUnit=om.angstrom)), + ) + ) + + # prepare the geometry file and upload + mop_iri = cls.init_instance_iri() + local_file_path = f"./data/xyz_mops_new/{am.rdfs_label}_{list(am.hasSymmetryPointGroup)[0]}___{mop_formula}___{mop_iri.split('/')[-1] if not bool(ccdc) else ccdc}.xyz" + mop_geo = ontospecies.Geometry.from_points(adjusted_atoms, local_file_path) + if upload_geometry: + # upload the geometry to the KG + if sparql_client is None: + raise ValueError('SPARQL client is required to upload the geometry') + else: + remote_file_path, timestamp_upload = sparql_client.upload_file(local_file_path) + mop_geo.hasGeometryFile = {remote_file_path} + + # release the blocked binding sites + for cbu in lst_cbu: + cbu.release_blocked_binding_sites() + + return cls( + instance_iri=mop_iri, + hasAssemblyModel=am, + hasChemicalBuildingUnit=lst_cbu, + hasProvenance=prov, + hasCharge=ontospecies.Charge(hasValue=om.Measure(hasNumericalValue=mop_charge, hasUnit=om.elementaryCharge)), + hasMolecularWeight=ontospecies.MolecularWeight(hasValue=om.Measure(hasNumericalValue=mop_mw, hasUnit=om.gramPerMole)), + hasMOPFormula=mop_formula, + hasCCDCNumber=ccdc, + hasGeometry=mop_geo, + hasCavity=Cavity(hasLargestInnerSphereDiameter=om.Diameter(hasValue=om.Measure(hasNumericalValue=inner_diameter, hasUnit=om.angstrom))), + hasOuterDiameter=om.Diameter(hasValue=om.Measure(hasNumericalValue=outer_diameter, hasUnit=om.angstrom)), + hasPoreSize=pore_sizes, + hasCBUAssemblyTransformation=cbu_assembly_transformation_lst + ) + + def visualise(self, sparql_client = None): + rows = [] + if list(self.hasGeometry)[0].hasPoints is None: + if sparql_client is None: + raise ValueError('SPARQL client is required to visualise/load the geometry') + list(self.hasGeometry)[0].load_xyz_from_geometry_file(sparql_client) + for pt in list(self.hasGeometry)[0].hasPoints: + rows.append([pt.label, pt.x, pt.y, pt.z]) + df = pd.DataFrame(rows, columns=['Atom', 'X', 'Y', 'Z',]) + fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='Atom', title=f'MOP: {list(self.hasMOPFormula)[0]}\n AM: {list(self.hasAssemblyModel)[0].instance_iri}') + fig.update_traces(marker=dict(size=2)) + fig.update_layout(autosize=False, width=1200, height=400) + fig.show() + return fig diff --git a/MOPTools/twa_mops/ontomops_backup.sh b/MOPTools/twa_mops/ontomops_backup.sh new file mode 100755 index 00000000000..5c88f294a58 --- /dev/null +++ b/MOPTools/twa_mops/ontomops_backup.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Get the current date in YYYY-MM-DD format +current_date=$(date +"%Y-%m-%d") + +# Define the output file name using the current date +output_file="ontomops_backup_${current_date}.ttl" + +# Load the environment variables from the .env file to get the Blazegraph endpoint +if [ -f mops.env ]; then + source mops.env + echo "Environment variables loaded from mops.env file" +else + echo "mops.env file not found!" + exit 1 +fi + +# Execute the curl command and save the output to the specified file +curl -X POST --url "$SPARQL_ENDPOINT" \ + --data-urlencode 'query=CONSTRUCT { ?s ?p ?o } where { ?s ?p ?o }' \ + --header 'Accept: application/x-turtle' \ + > "$output_file" + +# Print a message indicating where the output was saved +echo "Backup saved to $output_file" diff --git a/MOPTools/twa_mops/ontospecies.py b/MOPTools/twa_mops/ontospecies.py new file mode 100644 index 00000000000..85fb8295a35 --- /dev/null +++ b/MOPTools/twa_mops/ontospecies.py @@ -0,0 +1,73 @@ +from __future__ import annotations +from typing import List, Optional +from twa.data_model.base_ontology import BaseOntology, BaseClass, ObjectProperty, DatatypeProperty +import om +import geo + +from rdkit.Chem import GetPeriodicTable +from rdkit.Chem.rdmolfiles import MolFromXYZFile + +# NOTE TODO this script is incomplete as it only contains necessary classes and properties for the MOPs project +# NOTE TODO a complete OGM representation for OntoSpecies is yet to be implemented +# NOTE TODO it should also be moved to a place that accessible to all other ontologies +class OntoSpecies(BaseOntology): + base_url = 'http://www.theworldavatar.com/ontology/ontospecies/OntoSpecies.owl#' + + +# object properties +HasMolecularWeight = ObjectProperty.create_from_base('HasMolecularWeight', OntoSpecies) +HasCharge = ObjectProperty.create_from_base('HasCharge', OntoSpecies) +HasGeometry = ObjectProperty.create_from_base('HasGeometry', OntoSpecies) + +# data properties +HasGeometryFile = DatatypeProperty.create_from_base('HasGeometryFile', OntoSpecies) + + +# classes +class Charge(BaseClass): + rdfs_isDefinedBy = OntoSpecies + hasValue: om.HasValue[om.Measure] + +class MolecularWeight(BaseClass): + rdfs_isDefinedBy = OntoSpecies + hasValue: om.HasValue[om.Measure] + + @classmethod + def from_xyz_file(cls, xyz_file: str) -> MolecularWeight: + periodic_table = GetPeriodicTable() + mol = MolFromXYZFile(xyz_file) + atoms = [a.GetSymbol() for a in mol.GetAtoms()] + molecular_weight = sum(periodic_table.GetAtomicWeight(atom) for atom in atoms) + return cls(hasValue=om.Measure(hasNumericalValue=molecular_weight, hasUnit=om.gramPerMole)) + +class Geometry(BaseClass): + rdfs_isDefinedBy = OntoSpecies + hasGeometryFile: HasGeometryFile[str] + hasPoints: Optional[List[geo.Point]] = None + + @property + def geometry_file(self) -> str: + return list(self.hasGeometryFile)[0] + + @classmethod + def from_points(cls, points: List[geo.Point], file_name: str) -> Geometry: + file_name = file_name + '.xyz' if not file_name.endswith('.xyz') else file_name + pts = [p for p in points if p.label.lower() not in ['x', 'center']] + with open(file_name, 'w') as f: + f.write(f'{len(pts)}\n\n') + for pt in pts: + # enforce normal decimal numbers to avoid scientific notation which breaks reading the xyz file using rdkit + f.write(f'{pt.label} {pt.x:.20f} {pt.y:.20f} {pt.z:.20f}\n') + return cls(hasGeometryFile=file_name, hasPoints=points) + + def load_xyz_from_geometry_file(self, sparql_client): + lst_pt = [] + remote_file_path = list(self.hasGeometryFile)[0] + downloaded_file_path = remote_file_path.split('/')[-1] + sparql_client.download_file(remote_file_path, downloaded_file_path) + mol = MolFromXYZFile(downloaded_file_path) + for a in mol.GetAtoms(): + pos = mol.GetConformer().GetAtomPosition(a.GetIdx()) + pt = geo.Point(x=pos.x, y=pos.y, z=pos.z, label=a.GetSymbol()) + lst_pt.append(pt) + self.hasPoints = lst_pt