From 2033b869323d7f71f1973804c40c300c78c27da8 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Tue, 6 Dec 2022 19:29:20 +0000 Subject: [PATCH] Add symmetry fields to structure model Add new space group fields and outline tests Use constr for pydantic v1 instead of field regexp Remove space from regexp Fix outstanding discrepancies with specification Update test with new number of properties --- openapi/openapi.json | 76 ++++++++++++++++ optimade/models/structures.py | 167 +++++++++++++++++++++++++++++++++- optimade/models/utils.py | 6 ++ tests/models/test_utils.py | 10 ++ tests/server/test_client.py | 2 +- 5 files changed, 259 insertions(+), 2 deletions(-) diff --git a/openapi/openapi.json b/openapi/openapi.json index c5f1a39e2..13e9fada3 100644 --- a/openapi/openapi.json +++ b/openapi/openapi.json @@ -4346,6 +4346,82 @@ "x-optimade-support": "should", "x-optimade-unit": "\u00c5" }, + "space_group_symmetry_operations_xyz": { + "anyOf": [ + { + "items": { + "type": "string", + "pattern": "^([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?),([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?),([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?)$" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Space Group Symmetry Operations Xyz", + "description": "A list of symmetry operations given as general position x, y and z coordinates in algebraic form.\n\nEach symmetry operation is described by a string that gives that symmetry operation in Jones' faithful representation (Bradley & Cracknell, 1972: pp. 35-37), adapted for computer string notation.\nThe letters x, y and z that are typesetted with overbars in printed text represent coordinate values multiplied by -1 and are encoded as -x, -y and -z, respectively.\nThe syntax of the strings representing symmetry operations MUST conform to regular expressions given in appendix The Symmetry Operation String Regular Expressions.\nThe interpretation of the strings MUST follow the conventions of the IUCr CIF core dictionary (IUCr, 2023).\nIn particular, this property MUST explicitly provide all symmetry operations needed to generate all the atoms in the unit cell from the atoms in the asymmetric unit, for the setting used.\nThis symmetry operation set MUST always include the `\"x,y,z\"` identity operation.\nThe symmetry operations are to be applied to fractional atom coordinates.\nIn case only Cartesian coordinates are available, these Cartesian coordinates must be converted to fractional coordinates before the application of the provided symmetry operations.\nIf the symmetry operation list is present, it MUST be compatible with other space group specifications (e.g. the ITC space group number, the Hall symbol, the Hermann-Mauguin symbol) if these are present.\n\n- **Type**: list of strings\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n\n - The property is RECOMMENDED if coordinates are returned in a form to which these operations can or must be applied (e.g. fractional atom coordinates of an asymmetric unit).\n - The property is REQUIRED if symmetry operations are necessary to reconstruct the full model of the material and no other symmetry information (e.g., the Hall symbol) is provided that would allow the user to derive symmetry operations unambiguously.\n - MUST be null if `nperiodic_dimensions` is equal to 0.\n\n- **Examples**:\n\n - Space group operations for the space group with ITC number 3 (H-M symbol `P 2`, extended H-M symbol `P 1 2 1`, Hall symbol `P 2y`): `[\"x,y,z\", \"-x,y,-z\"]`\n - Space group operations for the space group with ITC number 5 (H-M symbol `C 2`, extended H-M symbol `C 1 2 1`, Hall symbol `C 2y`): `[\"x,y,z\", \"-x,y,-z\", \"x+1/2,y+1/2,z\", \"-x+1/2,y+1/2,-z\"]`\n\n- **Notes**:\n The list of space group symmetry operations applies to the whole periodic array of atoms and together with the lattice translations given in the `lattice_vectors` property provides the necessary information to reconstruct all atom site positions of the periodic material.\n Thus, the symmetry operations described in this property are only applicable to material models with at least one periodic dimension.\n This property is not meant to represent arbitrary symmetries of molecules, non-periodic (finite) collections of atoms or non-crystallographic symmetry.\n\n- **Bibliographic References**:\n\n Bradley, C. J. and Cracknell, A. P. (1972) The Mathematical Theory of Symmetry in Solids. Oxford, Clarendon Press (paperback edition 2010) 745 p. ISBN 978-0-19-958258-7.\n\n IUCr (2023) Core dictionary (coreCIF) version 2.4.5; data name _space_group_symop_operation_xyz. Available from: https://www.iucr.org/__data/iucr/cifdic_html/1/cif_core.dic/Ispace_group_symop_operation_xyz.html [Accessed 2023-06-18T16:46+03:00].", + "x-optimade-queryable": "optional", + "x-optimade-support": "optional" + }, + "space_group_symbol_hall": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Space Group Symbol Hall", + "description": "A Hall space group symbol representing the symmetry of the structure as defined in (Hall, 1981, 1981a).\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The change-of-basis operations are used as defined in the International Tables of Crystallography (ITC) Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).\n - Each component of the Hall symbol MUST be separated by a single space symbol.\n - If there exists a standard Hall symbol which represents the symmetry it SHOULD be used.\n - MUST be null if `nperiodic_dimensions` is not equal to 3.\n\n- **Examples**:\n\n - Space group symbols with explicit origin (the Hall symbols):\n\n - `P 2c -2ac`\n - `-I 4bd 2ab 3`\n\n - Space group symbols with change-of-basis operations:\n\n - `P 2yb (-1/2*x+z,1/2*x,y)`\n - `-I 4 2 (1/2*x+1/2*y,-1/2*x+1/2*y,z)`\n\n- **Bibliographic References**:\n\n Hall, S. R. (1981) Space-group notation with an explicit origin. Acta Crystallographica Section A, 37, 517-525, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001228\n\n Hall, S. R. (1981a) Space-group notation with an explicit origin; erratum. Acta Crystallographica Section A, 37, 921-921, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001976\n\n IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.", + "x-optimade-queryable": "optional", + "x-optimade-support": "optional" + }, + "space_group_symbol_hermann_mauguin": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Space Group Symbol Hermann Mauguin", + "description": "A human- and machine-readable string containing the short Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The H-M symbol SHOULD aim to convey the closest representation of the symmetry information that can be specified using the short format used in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1 as described in the accompanying text.\n - The symbol MAY be a non-standard short H-M symbol.\n - The H-M symbol does not unambiguously communicate the axis, cell, and origin choice, and the given symbol SHOULD NOT be amended to convey this information.\n - To encode as character strings, the following adaptations MUST be made when representing H-M symbols given in their typesetted form:\n\n - the overbar above the numbers MUST be changed to the minus sign in front of the digit (e.g. '-2');\n - subscripts that denote screw axes are written as digits immediately after the axis designator without a space (e.g. 'P 32')\n - the space group generators MUST be separated by a single space (e.g. 'P 21 21 2');\n - there MUST be no spaces in the space group generator designation (i.e. use 'P 21/m', not the 'P 21 / m');\n\n- **Examples**:\n - `C 2`\n - `P 21 21 21`\n\n- **Bibliographic References**:\n\n IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.", + "x-optimade-queryable": "optional", + "x-optimade-support": "optional" + }, + "space_group_symbol_hermann_mauguin_extended": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Space Group Symbol Hermann Mauguin Extended", + "description": "A human- and machine-readable string containing the extended Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.\n\n- **Type**: string\n- **Requirements/Conventions**:\n\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The H-M symbols SHOULD be given as specified in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1.\n - The change-of-basis operation SHOULD be provided for the non-standard axis and cell choices.\n - The extended H-M symbol does not unambiguously communicate the origin choice, and the given symbol SHOULD NOT be amended to convey this information.\n - The description of the change-of-basis SHOULD follow conventions of the ITC Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).\n - The same character string encoding conventions MUST be used as for the specification of the `space_group_symbol_hermann_mauguin` property.\n\n- **Examples**:\n\n - `C 1 2 1`\n\n- **Bibliographic References**:\n\n IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.", + "x-optimade-queryable": "optional", + "x-optimade-support": "optional" + }, + "space_group_it_number": { + "anyOf": [ + { + "type": "integer", + "maximum": 230.0, + "minimum": 1.0 + }, + { + "type": "null" + } + ], + "title": "Space Group It Number", + "description": "Space group number for the structure assigned by the International Tables for Crystallography Vol. A.\n- **Type**: integer\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The integer value MUST be between 1 and 230.\n - MUST be null if `nperiodic_dimensions` is not equal to 3.", + "x-optimade-queryable": "optional", + "x-optimade-support": "optional" + }, "nsites": { "anyOf": [ { diff --git a/optimade/models/structures.py b/optimade/models/structures.py index f9ba8aa6e..3567c711d 100644 --- a/optimade/models/structures.py +++ b/optimade/models/structures.py @@ -3,7 +3,15 @@ from enum import Enum, IntEnum from typing import TYPE_CHECKING, Annotated, Literal, Optional, Union -from pydantic import BaseModel, BeforeValidator, Field, field_validator, model_validator +from pydantic import ( + BaseModel, + BeforeValidator, + Field, + conint, + constr, + field_validator, + model_validator, +) from optimade.models.entries import EntryResource, EntryResourceAttributes from optimade.models.types import ChemicalSymbol @@ -11,6 +19,7 @@ ANONYMOUS_ELEMENTS, CHEMICAL_FORMULA_REGEXP, CHEMICAL_SYMBOLS, + SPACE_GROUP_SYMMETRY_OPERATION_REGEX, OptimadeField, StrictField, SupportLevel, @@ -43,6 +52,7 @@ list[Optional[Annotated[float, BeforeValidator(float)]]], Field(min_length=3, max_length=3), ] +SymOp = constr(pattern=SPACE_GROUP_SYMMETRY_OPERATION_REGEX) class Periodicity(IntEnum): @@ -587,6 +597,154 @@ class StructureResourceAttributes(EntryResourceAttributes): ), ] = None + space_group_symmetry_operations_xyz: Optional[list[SymOp]] = OptimadeField( # type: ignore[valid-type] + None, + description="""A list of symmetry operations given as general position x, y and z coordinates in algebraic form. + +Each symmetry operation is described by a string that gives that symmetry operation in Jones' faithful representation (Bradley & Cracknell, 1972: pp. 35-37), adapted for computer string notation. +The letters x, y and z that are typesetted with overbars in printed text represent coordinate values multiplied by -1 and are encoded as -x, -y and -z, respectively. +The syntax of the strings representing symmetry operations MUST conform to regular expressions given in appendix The Symmetry Operation String Regular Expressions. +The interpretation of the strings MUST follow the conventions of the IUCr CIF core dictionary (IUCr, 2023). +In particular, this property MUST explicitly provide all symmetry operations needed to generate all the atoms in the unit cell from the atoms in the asymmetric unit, for the setting used. +This symmetry operation set MUST always include the `"x,y,z"` identity operation. +The symmetry operations are to be applied to fractional atom coordinates. +In case only Cartesian coordinates are available, these Cartesian coordinates must be converted to fractional coordinates before the application of the provided symmetry operations. +If the symmetry operation list is present, it MUST be compatible with other space group specifications (e.g. the ITC space group number, the Hall symbol, the Hermann-Mauguin symbol) if these are present. + +- **Type**: list of strings + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`. + + - The property is RECOMMENDED if coordinates are returned in a form to which these operations can or must be applied (e.g. fractional atom coordinates of an asymmetric unit). + - The property is REQUIRED if symmetry operations are necessary to reconstruct the full model of the material and no other symmetry information (e.g., the Hall symbol) is provided that would allow the user to derive symmetry operations unambiguously. + - MUST be null if `nperiodic_dimensions` is equal to 0. + +- **Examples**: + + - Space group operations for the space group with ITC number 3 (H-M symbol `P 2`, extended H-M symbol `P 1 2 1`, Hall symbol `P 2y`): `["x,y,z", "-x,y,-z"]` + - Space group operations for the space group with ITC number 5 (H-M symbol `C 2`, extended H-M symbol `C 1 2 1`, Hall symbol `C 2y`): `["x,y,z", "-x,y,-z", "x+1/2,y+1/2,z", "-x+1/2,y+1/2,-z"]` + +- **Notes**: + The list of space group symmetry operations applies to the whole periodic array of atoms and together with the lattice translations given in the `lattice_vectors` property provides the necessary information to reconstruct all atom site positions of the periodic material. + Thus, the symmetry operations described in this property are only applicable to material models with at least one periodic dimension. + This property is not meant to represent arbitrary symmetries of molecules, non-periodic (finite) collections of atoms or non-crystallographic symmetry. + +- **Bibliographic References**: + + Bradley, C. J. and Cracknell, A. P. (1972) The Mathematical Theory of Symmetry in Solids. Oxford, Clarendon Press (paperback edition 2010) 745 p. ISBN 978-0-19-958258-7. + + IUCr (2023) Core dictionary (coreCIF) version 2.4.5; data name _space_group_symop_operation_xyz. Available from: https://www.iucr.org/__data/iucr/cifdic_html/1/cif_core.dic/Ispace_group_symop_operation_xyz.html [Accessed 2023-06-18T16:46+03:00].""", + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL, + ) + + space_group_symbol_hall: Optional[str] = OptimadeField( + None, + description="""A Hall space group symbol representing the symmetry of the structure as defined in (Hall, 1981, 1981a). + +- **Type**: string + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`. + - **Query**: Support for queries on this property is OPTIONAL. + - The change-of-basis operations are used as defined in the International Tables of Crystallography (ITC) Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001). + - Each component of the Hall symbol MUST be separated by a single space symbol. + - If there exists a standard Hall symbol which represents the symmetry it SHOULD be used. + - MUST be null if `nperiodic_dimensions` is not equal to 3. + +- **Examples**: + + - Space group symbols with explicit origin (the Hall symbols): + + - `P 2c -2ac` + - `-I 4bd 2ab 3` + + - Space group symbols with change-of-basis operations: + + - `P 2yb (-1/2*x+z,1/2*x,y)` + - `-I 4 2 (1/2*x+1/2*y,-1/2*x+1/2*y,z)` + +- **Bibliographic References**: + + Hall, S. R. (1981) Space-group notation with an explicit origin. Acta Crystallographica Section A, 37, 517-525, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001228 + + Hall, S. R. (1981a) Space-group notation with an explicit origin; erratum. Acta Crystallographica Section A, 37, 921-921, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001976 + + IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.""", + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL, + ) + + space_group_symbol_hermann_mauguin: Optional[str] = OptimadeField( + None, + description="""A human- and machine-readable string containing the short Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response. +- **Type**: string + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`. + - **Query**: Support for queries on this property is OPTIONAL. + - The H-M symbol SHOULD aim to convey the closest representation of the symmetry information that can be specified using the short format used in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1 as described in the accompanying text. + - The symbol MAY be a non-standard short H-M symbol. + - The H-M symbol does not unambiguously communicate the axis, cell, and origin choice, and the given symbol SHOULD NOT be amended to convey this information. + - To encode as character strings, the following adaptations MUST be made when representing H-M symbols given in their typesetted form: + + - the overbar above the numbers MUST be changed to the minus sign in front of the digit (e.g. '-2'); + - subscripts that denote screw axes are written as digits immediately after the axis designator without a space (e.g. 'P 32') + - the space group generators MUST be separated by a single space (e.g. 'P 21 21 2'); + - there MUST be no spaces in the space group generator designation (i.e. use 'P 21/m', not the 'P 21 / m'); + +- **Examples**: + - `C 2` + - `P 21 21 21` + +- **Bibliographic References**: + + IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.""", + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL, + ) + + space_group_symbol_hermann_mauguin_extended: Optional[str] = OptimadeField( + None, + description="""A human- and machine-readable string containing the extended Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response. + +- **Type**: string +- **Requirements/Conventions**: + + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`. + - **Query**: Support for queries on this property is OPTIONAL. + - The H-M symbols SHOULD be given as specified in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1. + - The change-of-basis operation SHOULD be provided for the non-standard axis and cell choices. + - The extended H-M symbol does not unambiguously communicate the origin choice, and the given symbol SHOULD NOT be amended to convey this information. + - The description of the change-of-basis SHOULD follow conventions of the ITC Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001). + - The same character string encoding conventions MUST be used as for the specification of the `space_group_symbol_hermann_mauguin` property. + +- **Examples**: + + - `C 1 2 1` + +- **Bibliographic References**: + + IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.""", + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL, + ) + + space_group_it_number: Optional[conint(ge=1, le=230)] = OptimadeField( # type: ignore[valid-type] + None, + description="""Space group number for the structure assigned by the International Tables for Crystallography Vol. A. +- **Type**: integer + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`. + - **Query**: Support for queries on this property is OPTIONAL. + - The integer value MUST be between 1 and 230. + - MUST be null if `nperiodic_dimensions` is not equal to 3.""", + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL, + ) + nsites: Annotated[ Optional[int], OptimadeField( @@ -960,6 +1118,13 @@ def elements_must_be_alphabetical( raise ValueError(f"elements must be sorted alphabetically, but is: {value}") return value + @field_validator("space_group_symbol_hall", "space_group_it_number", mode="after") + @classmethod + def check_space_group_vs_nperiodic_dimensions(cls, value): + if sorted(value) != value: + raise ValueError(f"elements must be sorted alphabetically, but is: {value}") + return value + @field_validator("elements_ratios", mode="after") @classmethod def ratios_must_sum_to_one( diff --git a/optimade/models/utils.py b/optimade/models/utils.py index 50349e3d5..a811d4a1e 100644 --- a/optimade/models/utils.py +++ b/optimade/models/utils.py @@ -365,3 +365,9 @@ def reduce_formula(formula: str) -> str: ) ELEMENT_SYMBOLS_PATTERN = "(" + "|".join(CHEMICAL_SYMBOLS) + ")" + +translation = "1/2|[12]/3|[1-3]/4|[1-5]/6" +translation_appended = f"[-+]? [xyz] ([-+][xyz])? ([-+] ({translation}) )?" +translation_prepended = f"[-+]? ({translation}) ([-+] [xyz] ([-+][xyz])? )?" +symop = f"({translation_appended}|{translation_prepended})".replace(" ", "") +SPACE_GROUP_SYMMETRY_OPERATION_REGEX = f"^{symop},{symop},{symop}$" diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py index df8f09dcc..e111dd1e8 100644 --- a/tests/models/test_utils.py +++ b/tests/models/test_utils.py @@ -1,3 +1,4 @@ +import re from typing import Callable import pytest @@ -159,3 +160,12 @@ def test_anonymize_formula(): assert anonymize_formula("Si1 O2") == "A2B" assert anonymize_formula("Si11 O2") == "A11B2" assert anonymize_formula("Si10 O2C4") == "A5B2C" + + +@pytest.mark.parametrize( + "symops", ["x,y,z", "-x,y,-z", "x+1/2,y+1/2,z", "-x+1/2,y+1/2,-z"] +) +def test_symop_regex(symops): + from optimade.models.utils import SPACE_GROUP_SYMMETRY_OPERATION_REGEX + + assert re.match(SPACE_GROUP_SYMMETRY_OPERATION_REGEX, symops) diff --git a/tests/server/test_client.py b/tests/server/test_client.py index 523567b4e..fa2c82016 100644 --- a/tests/server/test_client.py +++ b/tests/server/test_client.py @@ -511,7 +511,7 @@ def test_list_properties( results = cli.list_properties("structures") for database in results: - assert len(results[database]) == 22, str(results[database]) + assert len(results[database]) == 27 results = cli.search_property("structures", "site") for database in results: