diff --git a/.github/workflows/actions.yaml b/.github/workflows/actions.yaml deleted file mode 100644 index 632f51b..0000000 --- a/.github/workflows/actions.yaml +++ /dev/null @@ -1,55 +0,0 @@ -name: install-and-test-workflow -on: [push] -jobs: - install-and-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install dependencies - run: | - pip install --upgrade pip - pip install '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple - pip install coverage coveralls - - name: Test with pytest - run: | - python -m coverage run -m pytest -sv - - name: Submit to coveralls - continue-on-error: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - coveralls --service=github - build-and-install: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Build the package - run: | - pip install --upgrade pip - pip install build - python -m build --sdist - - name: Install the package - run: | - pip install dist/*.tar.gz --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple - ruff-formatting: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 - with: - args: "format ." - ruff-linting: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 - with: - args: "format . --check" diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml new file mode 100644 index 0000000..2bc3158 --- /dev/null +++ b/.github/workflows/actions.yml @@ -0,0 +1,74 @@ +name: install-and-test +on: [push] + +# https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs +# `contents` is for permission to the contents of the repository. +# `pull-requests` is for permission to pull request +permissions: + contents: write + checks: write + pull-requests: write + +jobs: + install-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install dependencies + run: | + pip install --upgrade pip + pip install uv + uv pip install -e '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple --system + uv pip install coveralls --system + - name: mypy + run: | + python -m mypy --ignore-missing-imports --follow-imports=silent --no-strict-optional src/nomad_parser_magres/schema_packages src/nomad_parser_magres/parsers tests + - name: Build coverage file + run: | + pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=src tests | tee pytest-coverage.txt + - name: Pytest coverage comment + uses: MishaKav/pytest-coverage-comment@main + with: + pytest-coverage-path: pytest-coverage.txt + junitxml-path: pytest.xml + - name: Submit to coveralls + continue-on-error: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + coveralls --service=github + build-and-install: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Build the package + run: | + pip install uv + uv pip install --upgrade pip --system + uv pip install build --system + python -m build --sdist + - name: Install the package + run: | + uv pip install dist/*.tar.gz --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple --system + ruff-linting: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + with: + args: "check ." + ruff-formatting: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + with: + args: "format . --check --verbose" diff --git a/.github/workflows/mkdocs-deploy.yml b/.github/workflows/mkdocs-deploy.yml index a6c4d41..c46ff23 100644 --- a/.github/workflows/mkdocs-deploy.yml +++ b/.github/workflows/mkdocs-deploy.yml @@ -4,6 +4,9 @@ on: push: branches: - main # Triggers deployment on push to the main branch + +permissions: + contents: write jobs: deploy: @@ -13,16 +16,9 @@ jobs: - name: Checkout Repository uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.x" - - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install mkdocs mkdocs-material==8.1.1 pymdown-extensions mkdocs-click - - - name: Build and Deploy - run: | - mkdocs gh-deploy --force --remote-branch gh-pages + - name: Deploy docs + uses: mhausenblas/mkdocs-deploy-gh-pages@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CONFIG_FILE: mkdocs.yml + REQUIREMENTS: requirements_docs.txt diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f7fcff6..d4e4675 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,84 +1,43 @@ -# Upload python package to pypi server and github release. -# Reference: https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries -name: Upload Python Package +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Build and Publish Python Package on: release: types: [published] +permissions: + contents: read + jobs: - # publish-to-pypi: - # name: >- - # Publish distribution to PyPI - # runs-on: ubuntu-latest - # environment: - # name: pypi - # url: https://pypi.org/p/nomad-schema-plugin-example - # permissions: - # id-token: write # IMPORTANT: mandatory for trusted publishing - # - # steps: - # - uses: actions/checkout@v4 - # - name: Set up Python - # uses: actions/setup-python@v4 - # with: - # python-version: "3.9" - # - name: Install pypa/build - # run: >- - # python3 -m - # pip install - # build - # --user - # - name: Build a binary wheel and a source tarball - # run: python3 -m build - # - name: Publish distribution to PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 + deploy: - github-release: - name: >- - Sign the Python distribution with Sigstore - and upload them to GitHub Release runs-on: ubuntu-latest - permissions: - contents: write # IMPORTANT: mandatory for making GitHub Releases - id-token: write # IMPORTANT: mandatory for sigstore steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.9" - - name: Install pypa/build - run: >- - python3 -m - pip install - build - --user - - name: Build a binary wheel and a source tarball - run: python3 -m build - - name: Sign the dists with Sigstore - uses: sigstore/gh-action-sigstore-python@v1.2.3 - with: - inputs: >- - ./dist/*.tar.gz - ./dist/*.whl - - name: Create GitHub Release - env: - GITHUB_TOKEN: ${{ github.token }} - run: >- - gh release create - '${{ github.ref_name }}' - --repo '${{ github.repository }}' - --notes "" - - name: Upload artifact signatures to GitHub Release - env: - GITHUB_TOKEN: ${{ github.token }} - # Upload to GitHub Release using the `gh` CLI. - # `dist/` contains the built packages, and the - # sigstore-produced signatures and certificates. - run: >- - gh release upload - '${{ github.ref_name }}' dist/** - --repo '${{ github.repository }}' + - uses: actions/checkout@v3 + + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install build + + - name: Build package + run: python -m build + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3a3647d..f2074f9 100644 --- a/.gitignore +++ b/.gitignore @@ -20,12 +20,12 @@ parts/ sdist/ var/ wheels/ -pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST +_version.py # PyInstaller # Usually these files are written by a python script from a template @@ -46,10 +46,12 @@ htmlcov/ .cache nosetests.xml coverage.xml +coverage.txt *.cover *.py,cover .hypothesis/ .pytest_cache/ +cover/ # Translations *.mo @@ -72,6 +74,7 @@ instance/ docs/_build/ # PyBuilder +.pybuilder/ target/ # Jupyter Notebook @@ -82,7 +85,9 @@ profile_default/ ipython_config.py # pyenv -.python-version +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. @@ -91,7 +96,22 @@ ipython_config.py # install all needed dependencies. #Pipfile.lock -# PEP 582; used by e.g. github.com/David-OConnor/pyflow +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff @@ -104,12 +124,12 @@ celerybeat.pid # Environments .env .venv +.pyenv env/ venv/ ENV/ env.bak/ venv.bak/ -.pyenv # Spyder project settings .spyderproject @@ -129,5 +149,25 @@ dmypy.json # Pyre type checker .pyre/ -# VSCode launch json +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# VSCode settings .vscode/launch.json + +# comments scripts +comments.py +todos* +questions* +warnings* +important* \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index a78624b..0c8648f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,24 +1,21 @@ { - "python.defaultInterpreterPath": ".pyenv/bin/python", - "python.terminal.activateEnvInCurrentTerminal": true, - "editor.rulers": [90], - "editor.renderWhitespace": "all", - "editor.tabSize": 4, - "[javascript]": { - "editor.tabSize": 2 + "editor.rulers": [ + 90 + ], + "editor.renderWhitespace": "all", + "editor.tabSize": 4, + "files.trimTrailingWhitespace": true, + "[python]": { + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports": "explicit" }, - "files.trimTrailingWhitespace": true, - "files.watcherExclude": { - "${workspaceFolder}/.pyenv/**": true - }, - "files.exclude": { - "\"**/*.pyc\": {\"when\": \"$(basename).py\"}": true, - "**/__pycache__": true, - "**/node_modules": true - }, - "python.testing.pytestPath": "pytest", - "python.testing.pytestArgs": ["tests"], - "python.testing.unittestEnabled": false, - "editor.defaultFormatter": "charliermarsh.ruff", - "editor.formatOnSave": true -} \ No newline at end of file + "editor.defaultFormatter": "charliermarsh.ruff" + }, + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/LICENSE b/LICENSE index 261eeb9..427417b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -178,7 +179,7 @@ APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" + boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a @@ -186,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index c1e4786..48b802e 100644 --- a/README.md +++ b/README.md @@ -1,128 +1,159 @@ -![](https://github.com/nomad-coe/nomad-parser-magres/actions/workflows/actions.yml/badge.svg) -![](https://coveralls.io/repos/github/nomad-coe/nomad-parser-magres/badge.svg?branch=main) -[![NOMAD](https://img.shields.io/badge/Open%20NOMAD-lightgray?logo=)](https://nomad-lab.eu/prod/v1/staging/gui/) +# `nomad-parser-magres` -# NOMAD's parser for magres - -This is a parser plugin for the [magres file format](https://www.ccpnc.ac.uk/output/magres) developed in the CCP-NC for Nuclear Magnetic Resonance (NMR) data. +This is a plugin for [NOMAD](https://nomad-lab.eu) which contains the parser and extended sections definitions for [magres](https://www.ccpnc.ac.uk/docs/magres) file format. The parser uses the [`nomad-simulations`](https://github.com/nomad-coe/nomad-simulations) schema as the basis of the schema. ## Getting started -### Install the dependencies +`nomad-parser-magres` can be installed as a PyPI package using `pip`. We require features from the `nomad-lab` package which are not publicly available in PyPI, so an extra flag `--index-url` needs to be specified when pip installing this package: +```sh +pip install nomad-parser-magres --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple +``` + +## Development -Clone the project and, in the workspace folder, create a virtual environment (note this project uses Python 3.9): +If you want to develop locally this package, clone the project and in the workspace folder, create a virtual environment (this project uses Python 3.9 or 3.10): ```sh -git clone https://github.com/nomad-coe/nomad-parser-magres.git +git clone https://github.com/CCP-NC/nomad-parser-magres.git cd nomad-parser-magres -python3.9 -m venv .pyenv -source .pyenv/bin/activate +python3.10 -m venv .pyenv +. .pyenv/bin/activate ``` -Install the `nomad-lab` package: +Make sure to have `pip` upgraded: ```sh pip install --upgrade pip -pip install '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple ``` -> **Note**: Until we have an official PyPI NOMAD release with the plugin functionality, make -sure to include NOMAD's internal package registry (via `--index-url` in the above command). - - -### Run the tests - -You can run the unit testing using the `pytest` package: +We recommend installing `uv` for fast pip installation of the packages: +```sh +pip install uv +``` +Install the `nomad-lab` package: ```sh -python -m pytest -sv +uv pip install '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple ``` -We recommend to install the `coverage` and `coveralls` packages for a more comprehensive output of the testing: +**Note!** +Until we have an official pypi NOMAD release with the plugins functionality make +sure to include NOMAD's internal package registry (via `--index-url` in the above command). + +The plugin is still under development. If you would like to contribute, install the package in editable mode (with the added `-e` flag): ```sh -pip install coverage coveralls -python -m coverage run -m pytest -sv +uv pip install -e '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple ``` -## Development -The plugin is still under development. If you would like to contribute, install the package in editable mode (with the added `-e` flag) with the development dependencies: +### Run the tests +You can run locally the tests: ```sh -pip install -e .[dev] --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple +python -m pytest -sv tests ``` +where the `-s` and `-v` options toggle the output verbosity. -### Setting up the plugin on your local installation -Read the [NOMAD plugin documentation](https://nomad-lab.eu/prod/v1/staging/docs/howto/oasis/plugins_install.html) for all details on how to deploy the plugin on your local NOMAD installation. - -To deploy the plugin in your local NOMAD installation, follow the next steps: - -1. Add the configurations related to NOMAD. This is already implemented through the plugin definition file ```src/nomad_parser_magres/nomad_plugin.yaml```: - - ```yaml - plugin_type: parser - name: parsers/magres - description: | - This plugin is used to parsed magres files into the NOMAD schema. - ``` - and the ```nomad.yaml``` configuration file: - - ```yaml - normalize: - normalizers: - include: - - MetainfoNormalizer - plugins: - # We only include our schema here. Without the explicit include, all plugins will be - # loaded. Many build in plugins require more dependencies. Install nomad-lab[parsing] - # to make all default plugins work. - include: - - 'parsers/magres' - - 'runschema' - - 'simulationworkflowschema' - options: - parsers/magres: - python_package: nomad_parser_magres - runschema: - python_package: runschema - simulationworkflowschema: - python_package: simulationworkflowschema - ``` -3. Add to your local NOMAD installation the same lines of your plugin ```nomad.yaml``` file. -4. Add to your local NOMAD installation environment the `PYTHONPATH` to your plugin. This can be done either by running the following command every time you start a new terminal for running the appworker, or by adding it to your virtual environment in the `/.pyenv/bin/activate` file: - - ```sh - export PYTHONPATH="$PYTHONPATH:/src" - ``` - -If you are working in this repository, you just need to activate the environment to start working using the ```nomad-parser-magres``` package locally in your own Python scripts. +Our CI/CD pipeline produces a more comprehensive test report using the `pytest-cov` package. You can generate a local coverage report: +```sh +uv pip install pytest-cov +python -m pytest --cov=src tests +``` ### Run linting and auto-formatting -Ruff auto-formatting is also a part of the GitHub workflow actions. Make sure that before you make a Pull Request to add your contributions to this repo, the following commands run in your local without any errors otherwise the workflow action will fail. + +We use [Ruff](https://docs.astral.sh/ruff/) for linting and formatting the code. Ruff auto-formatting is also a part of the GitHub workflow actions. You can run locally: ```sh ruff check . -``` -```sh ruff format . --check ``` -Alternatively, if you are using VSCode as your IDE, we added the settings configuration file, `.vscode/settings.json`, such that it performs `ruff format` whenever you save progress in a file. +### Debugging + +For interactive debugging of the tests, use `pytest` with the `--pdb` flag. We recommend using an IDE for debugging, e.g., _VSCode_. If that is the case, add the following snippet to your `.vscode/launch.json`: +```json +{ + "configurations": [ + { + "name": "", + "type": "debugpy", + "request": "launch", + "cwd": "${workspaceFolder}", + "program": "${workspaceFolder}/.pyenv/bin/pytest", + "justMyCode": true, + "env": { + "_PYTEST_RAISE": "1" + }, + "args": [ + "-sv", + "--pdb", + "", + ] + } + ] +} +``` + +where `` must be changed to the local path to the test module to be debugged. + +The settings configuration file `.vscode/settings.json` automatically applies the linting and formatting upon saving the modified file. - diff --git a/mkdocs.yml b/mkdocs.yml index 0352397..dbeac1e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,10 +1,20 @@ -site_name: Documentation -site_description: | - The documentation for the NOMAD parser for magres files. -site_author: The NOMAD Authors -repo_url: https://github.com/nomad-coe/nomad-parser-magres +site_name: nomad-parser-magres +site_description: NOMAD parser plugin for the magres file format. +site_author: CCP-NC + +repo_url: https://github.com/CCP-NC/nomad-parser-magres +edit_uri: "" + nav: - Home: index.md + - Tutorial: tutorial/tutorial.md + - How-to guides: + - Install this Plugin: how_to/install_this_plugin.md + - Use this Plugin: how_to/use_this_plugin.md + - Contribute to this plugin: how_to/contribute_to_this_plugin.md + - Contribute to the documentation: how_to/contribute_to_the_documentation.md + - Explanation: explanation/explanation.md + - Reference: reference/references.md plugins: - search theme: @@ -19,6 +29,8 @@ theme: features: - navigation.instant custom_dir: docs/theme + icon: + repo: fontawesome/brands/github markdown_extensions: - attr_list - md_in_html diff --git a/nomad.yaml b/nomad.yaml deleted file mode 100644 index 2c64878..0000000 --- a/nomad.yaml +++ /dev/null @@ -1,13 +0,0 @@ -normalize: - normalizers: - include: - - MetainfoNormalizer -plugins: - # We only include our schema here. Without the explicit include, all plugins will be - # loaded. Many build in plugins require more dependencies. Install nomad-lab[parsing] - # to make all default plugins work. - include: - - 'parsers/magres' - options: - parsers/magres: - python_package: nomad_parser_magres \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 69985e7..720bb43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,63 +1,95 @@ [build-system] -requires = [ - "setuptools>=61.0.0", - "setuptools-scm>=8.0", -] +requires = ["setuptools>=61.0.0", "setuptools-scm>=8.0"] build-backend = "setuptools.build_meta" [project] +classifiers = [ + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "License :: OSI Approved :: Apache Software License", +] name = "nomad-parser-magres" +description = "NOMAD parser plugin for the magres file format." dynamic = ["version"] -description = "A NOMAD plugin for parsing magres files." readme = "README.md" +requires-python = ">=3.9" authors = [ - { name = "Jose M. Pizarro", email = "jose.pizarro@physik.hu-berlin.de" } + { name = "The CCP-NC Authors" }, ] -license = { text = "Apache-2.0" } -requires-python = ">=3.9" -classifiers = [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", - "License :: OSI Approved :: Apache Software License", +maintainers = [ + { name = "The CCP-NC Authors" }, ] +license = { file = "LICENSE" } dependencies = [ - "nomad-lab@git+https://github.com/nomad-coe/nomad.git@develop", - "nomad-schema-plugin-simulation-workflow@git+https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git@develop", - "nomad-schema-plugin-run@git+https://github.com/nomad-coe/nomad-schema-plugin-run.git@develop", + "nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@6b7149a71b2999abbb2225fcb67a5acafc811806", + "nomad-simulations>=0.1.2", ] [project.urls] -"Homepage" = "https://github.com/nomad-coe/nomad-parser-magres" -"Bug Tracker" = "https://github.com/nomad-coe/nomad-parser-magres/issues" -"Documentation" = "https://nomad-coe.github.io/nomad-parser-magres/" +"Homepage" = "https://github.com/CCP-NC/nomad-parser-magres" +"Bug Tracker" = "https://github.com/CCP-NC/nomad-parser-magres/issues" +"Documentation" = "https://CCP-NC.github.io/nomad-parser-magres/" [project.optional-dependencies] dev = [ + "mypy==1.0.1", "ruff", "pytest", - "structlog==22.3.0", - "python-logstash==0.4.6", - "mongoengine>=0.20", - "pyjwt[crypto]==2.6.0", - "unidecode==1.3.2", - "fastapi==0.92.0", - "zipstream-new==1.1.5", - "lxml_html_clean>=0.1.0", + "pytest-timeout", + "pytest-cov", + "structlog", + "nomad-lab[infrastructure]", # for search and MetadataRequired to work ] [tool.ruff] -include = ["src/nomad_parser_magres/*.py", "tests/*.py"] -lint.select = [ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + + +[tool.ruff.lint] +select = [ "E", # pycodestyle "W", # pycodestyle "PL", # pylint + "F", # Pyflakes + "UP", # pyupgrade + "I", # isort ] -lint.ignore = [ + +ignore = [ "E501", # Line too long ({width} > {limit} characters) "E701", # Multiple statements on one line (colon) "E731", # Do not assign a lambda expression, use a def "E402", # Module level import not at top of file + "F401", # Module imported but unused "PLR0911", # Too many return statements "PLR0912", # Too many branches "PLR0913", # Too many arguments in function definition @@ -68,14 +100,11 @@ lint.ignore = [ "PLR1714", # consider-using-in "PLR5501", # else-if-used ] -lint.fixable = ["ALL"] -# Same as Black. -line-length = 88 -indent-width = 4 +fixable = ["ALL"] -# Assume Python 3.9 -target-version = "py39" +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" # this is entirely optional, you can remove this if you wish to [tool.ruff.format] @@ -91,9 +120,20 @@ skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "auto" +[tool.setuptools] +package-dir = { "" = "src" } [tool.setuptools.packages.find] where = ["src"] -[tool.setuptools.package-data] -nomad_parser_magres = ['*/nomad_plugin.yaml'] \ No newline at end of file +[tool.setuptools_scm] +write_to = "src/nomad_parser_magres/_version.py" + +[project.entry-points.'nomad.plugin'] +nomad_parser_magres_plugin = "nomad_parser_magres.parsers:nomad_parser_magres_plugin" +nomad_parser_magres_schema = "nomad_parser_magres.schema_packages:nomad_parser_magres_schema" + + +[tool.cruft] +# Avoid updating workflow files, this leads to permissions issues +skip = [".github/*"] diff --git a/requirements_docs.txt b/requirements_docs.txt new file mode 100644 index 0000000..bacf1ed --- /dev/null +++ b/requirements_docs.txt @@ -0,0 +1,4 @@ +mkdocs +mkdocs-material==8.1.1 +pymdown-extensions +mkdocs-click diff --git a/src/nomad_parser_magres/__init__.py b/src/nomad_parser_magres/__init__.py index a395bda..ce2e284 100644 --- a/src/nomad_parser_magres/__init__.py +++ b/src/nomad_parser_magres/__init__.py @@ -1 +1,4 @@ -from .parser import MagresParser +try: + from ._version import version as __version__ +except ImportError: + __version__ = '' diff --git a/src/nomad_parser_magres/__main__.py b/src/nomad_parser_magres/__main__.py deleted file mode 100644 index e9ed458..0000000 --- a/src/nomad_parser_magres/__main__.py +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import sys -import json -import logging - -from nomad.utils import configure_logging -from nomad.datamodel import EntryArchive -from nomad_parser_magres.parser import MagresParser - -if __name__ == '__main__': - configure_logging(console_log_level=logging.DEBUG) - archive = EntryArchive() - MagresParser().parse(sys.argv[1], archive, logging) - json.dump(archive.m_to_dict(), sys.stdout, indent=2) diff --git a/src/nomad_parser_magres/nomad_plugin.yaml b/src/nomad_parser_magres/nomad_plugin.yaml deleted file mode 100644 index abd66c4..0000000 --- a/src/nomad_parser_magres/nomad_plugin.yaml +++ /dev/null @@ -1,8 +0,0 @@ -plugin_type: parser -name: parsers/magres -description: | - This plugin is used to parsed magres files into the NOMAD schema. -mainfile_contents_re: \$magres-abinitio-v(\d\.)+ -mainfile_name_re: ^.*\.magres -parser_class_name: nomad_parser_magres.MagresParser -code_name: Magres \ No newline at end of file diff --git a/src/nomad_parser_magres/parser.py b/src/nomad_parser_magres/parser.py deleted file mode 100644 index f73313e..0000000 --- a/src/nomad_parser_magres/parser.py +++ /dev/null @@ -1,504 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import numpy as np -import logging - -from nomad.units import ureg -from nomad.parsing.file_parser import TextParser, Quantity -from runschema.run import Run, Program -from runschema.method import ( - Functional, - Method, - DFT, - XCFunctional, - BasisSetContainer, - BasisSet, - KMesh, -) -from runschema.system import System, Atoms -from runschema.calculation import ( - Calculation, - MagneticSusceptibility, - MagneticShielding, - ElectricFieldGradient, - SpinSpinCoupling, -) -from nomad_parser_magres.schema import m_package - -# For the automatic workflow NMR -from nomad.search import search -from nomad.app.v1.models import MetadataRequired -from .utils import BeyondDFTWorkflowsParser - - -re_float = r' *[-+]?\d+\.\d*(?:[Ee][-+]\d+)? *' - - -class MagresFileParser(TextParser): - def __init__(self): - super().__init__() - - def init_quantities(self): - self._quantities = [ - Quantity('lattice_units', r'units *lattice *([a-zA-Z]+)'), - Quantity('atom_units', r'units *atom *([a-zA-Z]+)'), - Quantity('ms_units', r'units *ms *([a-zA-Z]+)'), - Quantity('efg_units', r'units *efg *([a-zA-Z]+)'), - Quantity('efg_local_units', r'units *efg_local *([a-zA-Z]+)'), - Quantity('efg_nonlocal_units', r'units *efg_nonlocal *([a-zA-Z]+)'), - Quantity('isc_units', r'units *isc *([a-zA-Z\^\d\.\-]+)'), - Quantity('isc_fc_units', r'units *isc_fc *([a-zA-Z\^\d\.\-]+)'), - Quantity('isc_spin_units', r'units *isc_spin *([a-zA-Z\^\d\.\-]+)'), - Quantity( - 'isc_orbital_p_units', r'units *isc_orbital_p *([a-zA-Z\^\d\.\-]+)' - ), - Quantity( - 'isc_orbital_d_units', r'units *isc_orbital_d *([a-zA-Z\^\d\.\-]+)' - ), - Quantity('sus_units', r'units *sus *([a-zA-Z\^\d\.\-]+)'), - Quantity('cutoffenergy_units', rf'units *calc\_cutoffenergy *([a-zA-Z]+)'), - Quantity( - 'calculation', - r'([\[\<]*calculation[\>\]]*[\s\S]+?)(?:[\[\<]*\/calculation[\>\]]*)', - sub_parser=TextParser( - quantities=[ - Quantity('code', r'calc\_code *([a-zA-Z]+)'), - Quantity( - 'code_version', r'calc\_code\_version *([a-zA-Z\d\.]+)' - ), - Quantity( - 'code_hgversion', - r'calc\_code\_hgversion ([a-zA-Z\d\:\+\s]*)\n', - flatten=False, - ), - Quantity( - 'code_platform', r'calc\_code\_platform *([a-zA-Z\d\_]+)' - ), - Quantity('name', r'calc\_name *([\w]+)'), - Quantity('comment', r'calc\_comment *([\w]+)'), - Quantity('xcfunctional', r'calc\_xcfunctional *([\w]+)'), - Quantity( - 'cutoffenergy', - rf'calc\_cutoffenergy({re_float})(?P<__unit>\w+)', - ), - Quantity( - 'pspot', - r'calc\_pspot *([\w]+) *([\w\.\|\(\)\=\:]+)', - repeats=True, - ), - Quantity( - 'kpoint_mp_grid', - r'calc\_kpoint\_mp\_grid *([\w]+) *([\w]+) *([\w]+)', - ), - Quantity( - 'kpoint_mp_offset', - rf'calc\_kpoint\_mp\_offset({re_float*3})$', - ), - ] - ), - ), - Quantity( - 'atoms', - r'([\[\<]*atoms[\>\]]*[\s\S]+?)(?:[\[\<]*\/atoms[\>\]]*)', - sub_parser=TextParser( - quantities=[ - Quantity('lattice', rf'lattice({re_float*9})'), - Quantity('symmetry', r'symmetry *([\w\-\+\,]+)', repeats=True), - Quantity( - 'atom', - rf'atom *([a-zA-Z]+) *[a-zA-Z\d]* *([\d]+) *({re_float*3})', - repeats=True, - ), - ] - ), - ), - Quantity( - 'magres', - r'([\[\<]*magres[\>\]]*[\s\S]+?)(?:[\[\<]*\/magres[\>\]]*)', - sub_parser=TextParser( - quantities=[ - Quantity( - 'ms', rf'ms *(\w+) *(\d+)({re_float*9})', repeats=True - ), - Quantity( - 'efg', rf'efg *(\w+) *(\d+)({re_float*9})', repeats=True - ), - Quantity( - 'efg_local', - rf'efg_local *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'efg_nonlocal', - rf'efg_nonlocal *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'isc', - rf'isc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'isc_fc', - rf'isc_fc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'isc_orbital_p', - rf'isc_orbital_p *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'isc_orbital_d', - rf'isc_orbital_d *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity( - 'isc_spin', - rf'isc_spin *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', - repeats=True, - ), - Quantity('sus', rf'sus *({re_float*9})', repeats=True), - ] - ), - ), - ] - - -class MagresParser(BeyondDFTWorkflowsParser): - level = 1 - - def __init__(self): - self.magres_file_parser = MagresFileParser() - - self._xc_functional_map = { - 'LDA': ['LDA_C_PZ', 'LDA_X_PZ'], - 'PW91': ['GGA_C_PW91', 'GGA_X_PW91'], - 'PBE': ['GGA_C_PBE', 'GGA_X_PBE'], - 'RPBE': ['GGA_X_RPBE'], - 'WC': ['GGA_C_PBE_GGA_X_WC'], - 'PBESOL': ['GGA_X_RPBE'], - 'BLYP': ['GGA_C_LYP', 'LDA_X_B88'], - 'B3LYP': ['HYB_GGA_XC_B3LYP5'], - 'HF': ['HF_X'], - 'HF-LDA': ['HF_X_LDA_C_PW'], - 'PBE0': ['HYB_GGA_XC_PBEH'], - 'HSE03': ['HYB_GGA_XC_HSE03'], - 'HSE06': ['HYB_GGA_XC_HSE06'], - 'RSCAN': ['MGGA_X_RSCAN', 'MGGA_C_RSCAN'], - } - - def init_parser(self): - self.magres_file_parser.mainfile = self.filepath - self.magres_file_parser.logger = self.logger - - def _check_units_magres(self): - """ - Check if the units of the NMR quantities are magres standard. If not, a warning - is issued and the default units are used. - """ - allowed_units = { - 'lattice': 'Angstrom', - 'atom': 'Angstrom', - 'ms': 'ppm', - 'efg': 'au', - 'efg_local': 'au', - 'efg_nonlocal': 'au', - 'isc': '10^19.T^2.J^-1', - 'isc_fc': '10^19.T^2.J^-1', - 'isc_orbital_p': '10^19.T^2.J^-1', - 'isc_orbital_d': '10^19.T^2.J^-1', - 'isc_spin': '10^19.T^2.J^-1', - 'sus': '10^-6.cm^3.mol^-1', - } - for key, value in allowed_units.items(): - data = self.magres_file_parser.get(f'{key}_units', '') - if data and data != value: - self.logger.warning( - f'The units of the NMR quantities are not parsed if they are not magres standard. ' - f'We will use the default units.', - data={ - 'quantities': key, - 'standard_units': value, - 'parsed_units': data, - }, - ) - - def parse_system(self, sec_run: Run): - """ - Parse the System section by extracting information about the atomic structure: - lattice vectors, periodic boundary conditions, atom positions and labels from the - magres file. - - Args: - sec_run (Run): the section Run where System will be added. - """ - sec_atoms = Atoms() - - # Check if [atoms][/atoms] was correctly parsed - atoms = self.magres_file_parser.get('atoms') - if not atoms: - self.logger.warning('Could not find atomic structure in magres file.') - return - - # Store lattice_vectors and periodic boundary conditions - lattice_vectors = np.reshape(np.array(atoms.get('lattice', [])), (3, 3)) - sec_atoms.lattice_vectors = lattice_vectors * ureg.angstrom - pbc = ( - [True, True, True] if lattice_vectors is not None else [False, False, False] - ) - sec_atoms.periodic = pbc - - # Storing atom positions and labels - atoms_list = atoms.get('atom', []) - if len(atoms_list) == 0: - self.logger.warning( - 'Could not find atom positions and labels in magres file.' - ) - return - atom_labels = [] - atom_positions = [] - for atom in atoms_list: - atom_labels.append(atom[0]) - atom_positions.append(atom[2:]) - sec_atoms.labels = atom_labels - sec_atoms.positions = atom_positions * ureg.angstrom - - # Add Atoms to System and this to Run - sec_system = System() - sec_system.atoms = sec_atoms - sec_run.system.append(sec_system) - - def parse_method(self, calculation_params: TextParser, sec_run: Run): - """ - Parse the Method section by extracting information about the NMR method:basis set, - exchange-correlation functional, cutoff energy, and K mesh. - - Note: only CASTEP-like method parameters are currently being supported. - - Args: - calculation_params (TextParser): the parsed [calculation][/calculation] block parameters. - sec_run (Run): the section Run where Method will be added. - """ - sec_method = Method(label='NMR') - - # XC functional parsing - sec_dft = DFT() - xc_functional = calculation_params.get('xcfunctional', 'LDA') - xc_functional_labels = self._xc_functional_map.get(xc_functional) - if xc_functional_labels: - sec_xc_functional = XCFunctional() - for functional in xc_functional_labels: - sec_functional = Functional(name=functional) - if '_X_' in functional or functional.endswith('_X'): - sec_xc_functional.exchange.append(sec_functional) - elif '_C_' in functional or functional.endswith('_C'): - sec_xc_functional.correlation.append(sec_functional) - elif 'HYB' in functional: - sec_xc_functional.hybrid.append(sec_functional) - else: - sec_xc_functional.contributions.append(sec_functional) - sec_dft.xc_functional = sec_xc_functional - sec_method.dft = sec_dft - - # Basis set parsing (adding cutoff energies units check) - cutoff = calculation_params.get('cutoffenergy') - if cutoff.dimensionless: - cutoff_units = self.magres_file_parser.get('cutoffenergy_units', 'eV') - if cutoff_units == 'Hartree': - cutoff_units = 'hartree' - cutoff = cutoff.magnitude * ureg(cutoff_units) - sec_basis_set = BasisSetContainer( - type='plane waves', - scope=['wavefunction'], - basis_set=[BasisSet(scope=['valence'], type='plane waves', cutoff=cutoff)], - ) - sec_method.electrons_representation.append(sec_basis_set) - - # KMesh parsing - sec_k_mesh = KMesh( - grid=calculation_params.get('kpoint_mp_grid', [1, 1, 1]), - offset=calculation_params.get('kpoint_mp_offset', [0, 0, 0]), - ) - sec_method.k_mesh = sec_k_mesh - - # Add Method to Run - sec_run.method.append(sec_method) - - def parse_calculation(self, sec_run: Run): - """ - Parse the Calculation section by extracting information about the magnetic outputs - in the magres file: magnetic shielding tensor, electric field gradient, indirect - spin-spin coupling, and magnetic susceptibility. It also stores references to the - System and Method sections. - - Args: - sec_run (Run): the section Run where System will be added. - """ - # Check if [magres][/magres] was correctly parsed - magres_data = self.magres_file_parser.get('magres') - if not magres_data: - self.logger.warning('Could not find [magres] data block in magres file.') - return - - # Creating Calculation and adding System and Method refs - sec_scc = Calculation() - sec_scc.system_ref = sec_run.system[-1] - sec_scc.method_ref = sec_run.method[-1] - atom_labels = sec_scc.system_ref.atoms.labels - if not atom_labels: - self.logger.warning('Could not find the parsed atomic cell information.') - return - n_atoms = len(atom_labels) - - # Magnetic Shielding Tensor (ms) parsing - data = magres_data.get('ms', []) - if np.size(data) == n_atoms * (9 + 2): # 2 extra columns with atom labels - values = np.reshape([d[2:] for d in data], (n_atoms, 3, 3)) - values = np.transpose(values, axes=(0, 2, 1)) - isotropic_value = np.trace(values, axis1=1, axis2=2) / 3.0 - atoms = np.array([d[:2] for d in data]) - sec_ms = MagneticShielding(atoms=atoms) - sec_ms.value = values * 1e-6 * ureg('dimensionless') - sec_ms.isotropic_value = isotropic_value * 1e-6 * ureg('dimensionless') - sec_scc.magnetic_shielding.append(sec_ms) - - # Electric Field Gradient (efg) parsing - efg_contributions = { - 'efg_local': 'local', - 'efg_nonlocal': 'non_local', - 'efg': 'total', - } - for tag, contribution in efg_contributions.items(): - data = magres_data.get(tag, []) - if np.size(data) != n_atoms * (9 + 2): # 2 extra columns with atom labels - continue - values = np.reshape([d[2:] for d in data], (n_atoms, 3, 3)) - values = np.transpose(values, axes=(0, 2, 1)) - atoms = np.array([d[:2] for d in data]) - sec_efg = ElectricFieldGradient(atoms=atoms) - sec_efg.contribution = contribution - sec_efg.value = values * 9.717362e21 * ureg('V/m^2') - sec_scc.electric_field_gradient.append(sec_efg) - - # Indirect Spin-Spin Coupling (isc) parsing - isc_contributions = { - 'isc_fc': 'fermi_contact', - 'isc_orbital_p': 'orbital_paramagnetic', - 'isc_orbital_d': 'orbital_diamagnetic', - 'isc_spin': 'spin_dipolar', - 'isc': 'total', - } - for tag, contribution in isc_contributions.items(): - # TODO the data is organized differently to the NOMAD metainfo, we need to transform it properly - data = magres_data.get(tag, []) - if np.size(data) != n_atoms**2 * ( - 9 + 4 - ): # 4 extra columns with atom labels - continue - values = np.reshape([d[4:] for d in data], (n_atoms, n_atoms, 3, 3)) - values = np.transpose(values, axes=(0, 1, 3, 2)) - atoms = np.array([d[:4] for d in data]) - atoms_1 = atoms[:, 0:2] - atoms_2 = atoms[:, 2:4] - sec_isc = SpinSpinCoupling(atoms_1=atoms_1, atoms_2=atoms_2) - sec_isc.contribution = contribution - sec_isc.reduced_value = values * 1e19 * ureg('K^2/J') - sec_scc.spin_spin_coupling.append(sec_isc) - - # Magnetic Susceptibility (sus) parsing - data = magres_data.get('sus', []) - if np.size(data) == 9: - values = np.transpose(np.reshape(data, (3, 3))) - sec_sus = MagneticSusceptibility() - sec_sus.scale_dimension = 'macroscopic' - sec_sus.value = values * 1e-6 * ureg('dimensionless') - sec_scc.magnetic_susceptibility.append(sec_sus) - - # Add Calculation to Run - sec_run.calculation.append(sec_scc) - - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.logger = logger if logger is not None else logging.getLogger(__name__) - - self.init_parser() - self._check_units_magres() - - # Create Run with Program information - sec_run = Run() - calculation_params = self.magres_file_parser.get('calculation', {}) - program_name = calculation_params.get('code', '') - if program_name != 'CASTEP': - self.logger.error( - 'Only CASTEP-based NMR simulations are supported by the ' - 'magres parser.' - ) - return - sec_run.program = Program( - name=program_name, - version=calculation_params.get('code_version', ''), - ) - - # Parse main sections under Run - self.parse_system(sec_run) - - self.parse_method(calculation_params, sec_run) - - self.parse_calculation(sec_run) - - # Add run to the Archive - self.archive.run.append(sec_run) - - # We try to resolve the entry_id and mainfile of other entries in the upload - filepath_stripped = self.filepath.split('raw/')[-1] - metadata = [] - try: - upload_id = self.archive.metadata.upload_id - search_ids = search( - owner='visible', - user_id=self.archive.metadata.main_author.user_id, - query={'upload_id': upload_id}, - required=MetadataRequired(include=['entry_id', 'mainfile']), - ).data - metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids] - except Exception: - self.logger.warning( - 'Could not resolve the entry_id and mainfile of other entries in the upload.' - ) - return - for entry_id, mainfile in metadata: - if mainfile == filepath_stripped: # we skip the current parsed mainfile - continue - # We try to load the archive from its context and connect both the CASTEP - # and the magres entries - try: - entry_archive = archive.m_context.load_archive( - entry_id, upload_id, None - ) - method_label = entry_archive.run[-1].method[-1].label - if method_label == 'NMR': - castep_archive = entry_archive - # We write the workflow NMRMagRes directly in the magres entry - self.parse_nmr_magres_file_format(castep_archive) - break - except Exception: - continue diff --git a/src/nomad_parser_magres/parsers/__init__.py b/src/nomad_parser_magres/parsers/__init__.py new file mode 100644 index 0000000..b33fe74 --- /dev/null +++ b/src/nomad_parser_magres/parsers/__init__.py @@ -0,0 +1,21 @@ +from nomad.config.models.plugins import ParserEntryPoint +from pydantic import Field + + +class MagresParserEntryPoint(ParserEntryPoint): + parameter: int = Field(0, description='Custom configuration parameter') + + def load(self): + from nomad_parser_magres.parsers.parser import MagresParser + + return MagresParser(**self.dict()) + + +nomad_parser_magres_plugin = MagresParserEntryPoint( + name='MagresParserEntryPoint', + description='Entry point for the magres parser.', + level=1, + parser_as_interface=False, # in order to use `child_archives` and auto workflows + mainfile_contents_re=r'\$magres-abinitio-v(\d\.)+', + # mainfile_name_re='^.*magres', +) diff --git a/src/nomad_parser_magres/parsers/parser.py b/src/nomad_parser_magres/parsers/parser.py new file mode 100644 index 0000000..8e882bd --- /dev/null +++ b/src/nomad_parser_magres/parsers/parser.py @@ -0,0 +1,756 @@ +import os +from typing import TYPE_CHECKING, Optional + +import numpy as np + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from nomad_simulations.schema_packages.model_system import Cell + from structlog.stdlib import BoundLogger + +from nomad.app.v1.models.models import MetadataRequired +from nomad.config import config +from nomad.datamodel.metainfo.workflow import Link, TaskReference +from nomad.parsing.file_parser import Quantity, TextParser +from nomad.search import search +from nomad.units import ureg +from nomad.utils import extract_section +from nomad_simulations.schema_packages.atoms_state import AtomsState +from nomad_simulations.schema_packages.general import Program +from nomad_simulations.schema_packages.model_method import ( + DFT, + ModelMethod, + XCFunctional, +) +from nomad_simulations.schema_packages.model_system import AtomicCell, ModelSystem +from nomad_simulations.schema_packages.numerical_settings import KMesh, KSpace + +# utility function used to get auxiliary files next to the `mainfile` +from nomad_parser_magres.parsers.utils import get_files +from nomad_parser_magres.schema_packages.ccpnc_metadata import CCPNCMetadata +from nomad_parser_magres.schema_packages.package import CCPNCSimulation as Simulation +from nomad_parser_magres.schema_packages.package import ( + ElectricFieldGradient, + MagneticShieldingTensor, + MagneticSusceptibility, + Outputs, + SpinSpinCoupling, +) +from nomad_parser_magres.schema_packages.workflow import ( + NMRMagRes, + NMRMagResMethod, + NMRMagResResults, +) + +re_float = r' *[-+]?\d+\.\d*(?:[Ee][-+]\d+)? *' + +configuration = config.get_plugin_entry_point( + 'nomad_parser_magres.parsers:nomad_parser_magres_plugin' +) + + +class MagresFileParser(TextParser): + def __init__(self): + super().__init__() + + def init_quantities(self): + self._quantities = [ + Quantity('lattice_units', r'units *lattice *([a-zA-Z]+)'), + Quantity('atom_units', r'units *atom *([a-zA-Z]+)'), + Quantity('ms_units', r'units *ms *([a-zA-Z]+)'), + Quantity('efg_units', r'units *efg *([a-zA-Z]+)'), + Quantity('efg_local_units', r'units *efg_local *([a-zA-Z]+)'), + Quantity('efg_nonlocal_units', r'units *efg_nonlocal *([a-zA-Z]+)'), + Quantity('isc_units', r'units *isc *([a-zA-Z\^\d\.\-]+)'), + Quantity('isc_fc_units', r'units *isc_fc *([a-zA-Z\^\d\.\-]+)'), + Quantity('isc_spin_units', r'units *isc_spin *([a-zA-Z\^\d\.\-]+)'), + Quantity( + 'isc_orbital_p_units', r'units *isc_orbital_p *([a-zA-Z\^\d\.\-]+)' + ), + Quantity( + 'isc_orbital_d_units', r'units *isc_orbital_d *([a-zA-Z\^\d\.\-]+)' + ), + Quantity('sus_units', r'units *sus *([a-zA-Z\^\d\.\-]+)'), + Quantity('cutoffenergy_units', r'units *calc\_cutoffenergy *([a-zA-Z]+)'), + Quantity( + 'calculation', + r'([\[\<]*calculation[\>\]]*[\s\S]+?)(?:[\[\<]*\/calculation[\>\]]*)', + sub_parser=TextParser( + quantities=[ + Quantity('code', r'calc\_code *([a-zA-Z]+)'), + Quantity( + 'code_version', r'calc\_code\_version *([a-zA-Z\d\.]+)' + ), + Quantity( + 'code_hgversion', + r'calc\_code\_hgversion ([a-zA-Z\d\:\+\s]*)\n', + flatten=False, + ), + Quantity( + 'code_platform', r'calc\_code\_platform *([a-zA-Z\d\_]+)' + ), + Quantity('name', r'calc\_name *([\w]+)'), + Quantity('comment', r'calc\_comment *([\w]+)'), + Quantity('xcfunctional', r'calc\_xcfunctional *([\w]+)'), + Quantity( + 'cutoffenergy', + rf'calc\_cutoffenergy({re_float})(?P<__unit>\w+)', + ), + Quantity( + 'pspot', + r'calc\_pspot *([\w]+) *([\w\.\|\(\)\=\:]+)', + repeats=True, + ), + Quantity( + 'kpoint_mp_grid', + r'calc\_kpoint\_mp\_grid *([\w]+) *([\w]+) *([\w]+)', + ), + Quantity( + 'kpoint_mp_offset', + rf'calc\_kpoint\_mp\_offset({re_float*3})$', + ), + ] + ), + ), + Quantity( + 'atoms', + r'([\[\<]*atoms[\>\]]*[\s\S]+?)(?:[\[\<]*\/atoms[\>\]]*)', + sub_parser=TextParser( + quantities=[ + Quantity('lattice', rf'lattice({re_float*9})'), + Quantity('symmetry', r'symmetry *([\w\-\+\,]+)', repeats=True), + Quantity( + 'atom', + rf'atom *([a-zA-Z]+) *[a-zA-Z\d]* *([\d]+) *({re_float*3})', + repeats=True, + ), + ] + ), + ), + Quantity( + 'magres', + r'([\[\<]*magres[\>\]]*[\s\S]+?)(?:[\[\<]*\/magres[\>\]]*)', + sub_parser=TextParser( + quantities=[ + Quantity( + 'ms', rf'ms *(\w+) *(\d+)({re_float*9})', repeats=True + ), + Quantity( + 'efg', rf'efg *(\w+) *(\d+)({re_float*9})', repeats=True + ), + Quantity( + 'efg_local', + rf'efg_local *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'efg_nonlocal', + rf'efg_nonlocal *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'isc', + rf'isc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'isc_fc', + rf'isc_fc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'isc_orbital_p', + rf'isc_orbital_p *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'isc_orbital_d', + rf'isc_orbital_d *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity( + 'isc_spin', + rf'isc_spin *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + repeats=True, + ), + Quantity('sus', rf'sus *({re_float*9})', repeats=True), + ] + ), + ), + ] + + +class MagresParser: + def __init__(self, *args, **kwargs): + super().__init__() + self.magres_file_parser = MagresFileParser() + + self._xc_functional_map = { + 'LDA': ['LDA_C_PZ', 'LDA_X_PZ'], + 'PW91': ['GGA_C_PW91', 'GGA_X_PW91'], + 'PBE': ['GGA_C_PBE', 'GGA_X_PBE'], + 'RPBE': ['GGA_X_RPBE'], + 'WC': ['GGA_C_PBE_GGA_X_WC'], + 'PBESOL': ['GGA_X_RPBE'], + 'BLYP': ['GGA_C_LYP', 'LDA_X_B88'], + 'B3LYP': ['HYB_GGA_XC_B3LYP5'], + 'HF': ['HF_X'], + 'HF-LDA': ['HF_X_LDA_C_PW'], + 'PBE0': ['HYB_GGA_XC_PBEH'], + 'HSE03': ['HYB_GGA_XC_HSE03'], + 'HSE06': ['HYB_GGA_XC_HSE06'], + 'RSCAN': ['MGGA_X_RSCAN', 'MGGA_C_RSCAN'], + } + + def _check_units_magres(self, logger: 'BoundLogger') -> None: + """ + Check if the units of the NMR quantities are magres standard. If not, a warning + is issued and the default units are used. + """ + allowed_units = { + 'lattice': 'Angstrom', + 'atom': 'Angstrom', + 'ms': 'ppm', + 'efg': 'au', + 'efg_local': 'au', + 'efg_nonlocal': 'au', + 'isc': '10^19.T^2.J^-1', + 'isc_fc': '10^19.T^2.J^-1', + 'isc_orbital_p': '10^19.T^2.J^-1', + 'isc_orbital_d': '10^19.T^2.J^-1', + 'isc_spin': '10^19.T^2.J^-1', + 'sus': '10^-6.cm^3.mol^-1', + } + for key, value in allowed_units.items(): + data = self.magres_file_parser.get(f'{key}_units', '') + if data and data != value: + logger.warning( + 'The units of the NMR quantities are not parsed if they are not magres standard. ' + 'We will use the default units.', + data={ + 'quantities': key, + 'standard_units': value, + 'parsed_units': data, + }, + ) + + def init_parser(self, logger: 'BoundLogger') -> None: + """ + Initialize the `MagresFileParser` with the mainfile and logger. + + Args: + logger (BoundLogger): The logger to log messages. + """ + self.magres_file_parser.mainfile = self.mainfile + self.magres_file_parser.logger = logger + + def parse_atomic_cell( + self, atoms: Optional[TextParser], logger: 'BoundLogger' + ) -> Optional[AtomicCell]: + """ + Parse the `AtomicCell` section from the magres file. + + Args: + atoms (Optional[TextParser]): The parsed text section [atoms][/atoms] of the magres file. + logger (BoundLogger): The logger to log messages. + + Returns: + Optional[AtomicCell]: The parsed `AtomicCell` section. + """ + # Check if [atoms][/atoms] was correctly parsed + if not atoms: + logger.warning('Could not find atomic structure in magres file.') + return None + atomic_cell = AtomicCell() + + # Parse `lattice_vectors` and `periodic_boundary_conditions` + try: + lattice_vectors = np.reshape(np.array(atoms.get('lattice', [])), (3, 3)) + atomic_cell.lattice_vectors = lattice_vectors * ureg.angstrom + pbc = ( + [True, True, True] + if lattice_vectors is not None + else [False, False, False] + ) + atomic_cell.periodic_boundary_conditions = pbc + except Exception: + logger.warning( + 'Could not parse `lattice_vectors` and `periodic_boundary_conditions`.' + ) + return None + + # Parse `positions` and `AtomsState` list + atoms_list = atoms.get('atom', []) + if len(atoms_list) == 0: + logger.warning( + 'Could not find atom `positions` and their chemical symbols in magres file.' + ) + return None + positions = [] + atoms_states = [] + for atom in atoms_list: + atoms_states.append(AtomsState(chemical_symbol=atom[0])) + positions.append(atom[2:]) + atomic_cell.positions = positions * ureg.angstrom + atomic_cell.atoms_state = atoms_states + return atomic_cell + + def parse_model_system(self, logger: 'BoundLogger') -> Optional[ModelSystem]: + """ + Parse the `ModelSystem` section from the magres file if the [atoms][/atoms] section + in the magres file was correctly matched. + + Args: + logger (BoundLogger): The logger to log messages. + + Returns: + Optional[ModelSystem]: The parsed `ModelSystem` section. + """ + # Check if [atoms][/atoms] was correctly parsed + atoms = self.magres_file_parser.get('atoms') + if not atoms: + logger.warning('Could not find atomic structure in magres file.') + return None + + # Parse `ModelSystem` and its `cell` + model_system = ModelSystem() + model_system.is_representative = True + atomic_cell = self.parse_atomic_cell(atoms=atoms, logger=logger) + model_system.cell.append(atomic_cell) + return model_system + + def parse_xc_functional( + self, calculation_params: Optional[TextParser] + ) -> list[XCFunctional]: + """ + Parse the exchange-correlation functional information from the magres file. This + uses the `libxc` naming convention. + + Args: + calculation_params (Optional[TextParser]): The parsed [calculation][/calculation] block parameters. + + Returns: + list[XCFunctional]: The parsed `XCFunctional` sections. + """ + xc_functional = calculation_params.get('xcfunctional', 'LDA') + xc_functional_labels = self._xc_functional_map.get(xc_functional, []) + xc_sections = [] + for xc in xc_functional_labels: + functional = XCFunctional(libxc_name=xc) + if '_X_' in xc: + functional.name = 'exchange' + elif '_C_' in xc: + functional.name = 'correlation' + elif 'HYB' in xc: + functional.name = 'hybrid' + else: + functional.name = 'contribution' + xc_sections.append(functional) + return xc_sections + + def parse_model_method( + self, calculation_params: Optional[TextParser] + ) -> ModelMethod: + """ + Parse the `ModelMethod` section by extracting information about the NMR method: basis set, + exchange-correlation functional, cutoff energy, and K mesh. + + Note: only CASTEP-like method parameters are currently being supported. + + Args: + calculation_params (Optional[TextParser]): The parsed [calculation][/calculation] block parameters. + + Returns: + Optional[ModelMethod]: The parsed `ModelMethod` section. + """ + model_method = DFT(name='NMR') + + # Parse `XCFunctinals` information + xc_functionals = self.parse_xc_functional(calculation_params=calculation_params) + if len(xc_functionals) > 0: + model_method.xc_functionals = xc_functionals + + # TODO add when @ndaelman-hu finishes implementation of `BasisSet` + # # Basis set parsing (adding cutoff energies units check) + # cutoff = calculation_params.get('cutoffenergy') + # if cutoff.dimensionless: + # cutoff_units = self.magres_file_parser.get('cutoffenergy_units', 'eV') + # if cutoff_units == 'Hartree': + # cutoff_units = 'hartree' + # cutoff = cutoff.magnitude * ureg(cutoff_units) + # sec_basis_set = BasisSetContainer( + # type='plane waves', + # scope=['wavefunction'], + # basis_set=[BasisSet(scope=['valence'], type='plane waves', cutoff=cutoff)], + # ) + # sec_method.electrons_representation.append(sec_basis_set) + + # Parse `KSpace` as a `NumericalSettings` section + k_mesh = KMesh( + grid=calculation_params.get('kpoint_mp_grid', [1, 1, 1]), + offset=calculation_params.get('kpoint_mp_offset', [0, 0, 0]), + ) + model_method.numerical_settings.append(KSpace(k_mesh=[k_mesh])) + + return model_method + + def parse_magnetic_shieldings( + self, magres_data: TextParser, cell: 'Cell', logger: 'BoundLogger' + ) -> list[MagneticShieldingTensor]: + """ + Parse the magnetic shieldings from the magres file and assign `entity_ref` to the specific `AtomsState`. + + Args: + magres_data (TextParser): The parsed [magres][/magres] block. + cell (Cell): The parsed `Cell` section. + logger (BoundLogger): The logger to log messages. + + Returns: + list[MagneticShieldingTensor]: The list of parsed `MagneticShieldingTensor` sections. + """ + n_atoms = len(cell.atoms_state) + data = magres_data.get('ms', []) + + # Initial check on the size of the matched text + if np.size(data) != n_atoms * (9 + 2): # 2 extra columns with atom labels + logger.warning( + 'The shape of the matched text from the magres file for the `ms` does not coincide with the number of atoms.' + ) + return [] + + # Parse magnetic shieldings and their refs to the specific `AtomsState` + magnetic_shieldings = [] + for i, atom_data in enumerate(data): + values = np.transpose(np.reshape(atom_data[2:], (3, 3))) + sec_ms = MagneticShieldingTensor(entity_ref=cell.atoms_state[i]) + sec_ms.value = values * 1e-6 * ureg('dimensionless') + magnetic_shieldings.append(sec_ms) + return magnetic_shieldings + + def parse_electric_field_gradients( + self, magres_data: TextParser, cell: 'Cell', logger: 'BoundLogger' + ) -> list[ElectricFieldGradient]: + """ + Parse the electric field gradients from the magres file and assign `entity_ref` to the specific `AtomsState`. + + Args: + magres_data (TextParser): The parsed [magres][/magres] block. + cell (Cell): The parsed `Cell` section. + logger (BoundLogger): The logger to log messages. + + Returns: + list[ElectricFieldGradient]: The list of parsed `ElectricFieldGradient` sections. + """ + n_atoms = len(cell.atoms_state) + efg_contributions = { + 'efg_local': 'local', + 'efg_nonlocal': 'non_local', + 'efg': 'total', + } + electric_field_gradients = [] + for tag, contribution in efg_contributions.items(): + data = magres_data.get(tag, []) + + # Initial check on the size of the matched text + if np.size(data) != n_atoms * (9 + 2): # 2 extra columns with atom labels + logger.warning( + 'The shape of the matched text from the magres file for the `efg` does not coincide with the number of atoms.' + ) + return [] + + # Parse electronic field gradients for each contribution and their refs to the specific `AtomsState` + for i, atom_data in enumerate(data): + values = np.transpose(np.reshape(atom_data[2:], (3, 3))) + sec_efg = ElectricFieldGradient( + type=contribution, entity_ref=cell.atoms_state[i] + ) + sec_efg.value = values * 9.717362e21 * ureg('V/m^2') + electric_field_gradients.append(sec_efg) + return electric_field_gradients + + def parse_spin_spin_couplings( + self, magres_data: TextParser, cell: 'Cell', logger: 'BoundLogger' + ) -> list[SpinSpinCoupling]: + """ + Parse the spin-spin couplings from the magres file and assign `entity_ref_1` and `entity_ref_2` + to the specific `AtomsState`. + + Args: + magres_data (TextParser): The parsed [magres][/magres] block. + cell (Cell): The parsed `Cell` section. + logger (BoundLogger): The logger to log messages. + + Returns: + list[SpinSpinCoupling]: The list of parsed `SpinSpinCoupling` sections. + """ + n_atoms = len(cell.atoms_state) + isc_contributions = { + 'isc_fc': 'fermi_contact', + 'isc_orbital_p': 'orbital_paramagnetic', + 'isc_orbital_d': 'orbital_diamagnetic', + 'isc_spin': 'spin_dipolar', + 'isc': 'total', + } + spin_spin_couplings = [] + for tag, contribution in isc_contributions.items(): + data = magres_data.get(tag, []) + + # Initial check on the size of the matched text + if np.size(data) != n_atoms**2 * ( + 9 + 4 + ): # 4 extra columns with atom labels + logger.warning( + 'The shape of the matched text from the magres file for the `isc` does not coincide with the number of atoms.' + ) + return [] + + # Parse spin-spin couplings for each contribution and their refs to the specific `AtomsState` + for i, coupled_atom_data in enumerate(data): + for j, atom_data in enumerate(coupled_atom_data): + values = np.transpose(np.reshape(atom_data[4:], (3, 3))) + sec_isc = SpinSpinCoupling( + type=contribution, + entity_ref_1=cell.atoms_state[i], + entity_ref_2=cell.atoms_state[j], + ) + sec_isc.reduced_value = values * 1e19 * ureg('K^2/J') + spin_spin_couplings.append(sec_isc) + return spin_spin_couplings + + def parse_magnetic_susceptibilities( + self, magres_data: TextParser, logger: 'BoundLogger' + ) -> list[MagneticSusceptibility]: + """ + Parse the magnetic susceptibilities from the magres file. + + Args: + magres_data (TextParser): The parsed [magres][/magres] block. + logger (BoundLogger): The logger to log messages. + + Returns: + list[MagneticSusceptibility]: The list of parsed `MagneticSusceptibility` sections. + """ + data = magres_data.get('sus', []) + if np.size(data) != 9: + logger.warning( + 'The shape of the matched text from the magres file for the `sus` does not coincide with 9 (3x3 tensor).' + ) + return [] + values = np.transpose(np.reshape(data, (3, 3))) + sec_sus = MagneticSusceptibility(scale_dimension='macroscopic') + sec_sus.value = values * 1e-6 * ureg('dimensionless') + return [sec_sus] + + def parse_outputs( + self, simulation: Simulation, logger: 'BoundLogger' + ) -> Optional[Outputs]: + """ + Parse the `Outputs` section. It extracts the information of the [magres][/magres] block and passes + it as input for parsing the corresponding properties. It also assigns references to the `ModelMethod` and `ModelSystem` + sections used for the simulation. + + Args: + simulation (Simulation): The `Simulation` section used to resolve the references. + logger (BoundLogger): The logger to log messages. + + Returns: + Optional[Outputs]: The parsed `Outputs` section. + """ + # Initial check on `Simulation.model_system` and store the number of `AtomsState` in the + # cell for checks of the output properties blocks + if simulation.model_system is None: + logger.warning( + 'Could not find the `ModelSystem` that the outputs reference to.' + ) + return None + outputs = Outputs( + model_method_ref=simulation.model_method[-1], + model_system_ref=simulation.model_system[-1], + ) + if ( + not simulation.model_system[-1].cell + or not simulation.model_system[-1].cell[-1].atoms_state + ): + logger.warning( + 'Could not find the `cell` sub-section or the `AtomsState` list under it.' + ) + return None + cell = simulation.model_system[-1].cell[-1] + + # Check if [magres][/magres] was correctly parsed + magres_data = self.magres_file_parser.get('magres') + if not magres_data: + logger.warning('Could not find [magres] data block in magres file.') + return None + + # Parse `MagneticShieldingTensor` + ms = self.parse_magnetic_shieldings( + magres_data=magres_data, cell=cell, logger=logger + ) + if len(ms) > 0: + outputs.magnetic_shieldings = ms + + # Parse `ElectricFieldGradient` + efg = self.parse_electric_field_gradients( + magres_data=magres_data, cell=cell, logger=logger + ) + if len(efg) > 0: + outputs.electric_field_gradients = efg + + # Parse `SpinSpinCoupling` + isc = self.parse_spin_spin_couplings( + magres_data=magres_data, cell=cell, logger=logger + ) + if len(isc) > 0: + outputs.spin_spin_couplings = isc + + # Parse `MagneticSusceptibility` + mag_sus = self.parse_magnetic_susceptibilities( + magres_data=magres_data, logger=logger + ) + if len(mag_sus) > 0: + outputs.magnetic_susceptibilities = mag_sus + + return outputs + + def parse_nmr_magres_file_format( + self, nmr_first_principles_archive: 'EntryArchive' + ): + """ + Automatically parses the NMR Magres workflow. Here, `self.archive` is the + NMR magres archive in which we will link the original NMR first principles (CASTEP + or QuantumESPRESSO) entry. + + Args: + nmr_first_principles_archive (EntryArchive): the NMR (first principles) CASTEP or QuantumESPRESSO archive. + """ + workflow = NMRMagRes(method=NMRMagResMethod(), results=NMRMagResResults()) + workflow.name = 'NMR Magres' + + # ! Fix this once CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data' + # Method + # method_nmr = extract_section(nmr_first_principles_archive, ['run', 'method']) + # workflow.method.nmr_method_ref = method_nmr + + # Inputs and Outputs + # ! Fix this to extract `input_structure` from `nmr_first_principles_archive` once + # ! CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data' + input_structure = extract_section(self.archive, ['data', 'model_system']) + nmr_magres_calculation = extract_section(self.archive, ['data', 'outputs']) + if input_structure: + workflow.m_add_sub_section( + NMRMagRes.inputs, Link(name='Input structure', section=input_structure) + ) + if nmr_magres_calculation: + workflow.m_add_sub_section( + NMRMagRes.outputs, + Link(name='Output NMR calculation', section=nmr_magres_calculation), + ) + + # NMR (first principles) task + # ! Fix this once CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data' + program_name = nmr_first_principles_archive.run[-1].program.name + if nmr_first_principles_archive.workflow2: + task = TaskReference(task=nmr_first_principles_archive.workflow2) + task.name = f'NMR FirstPrinciples {program_name}' + if input_structure: + task.inputs = [Link(name='Input structure', section=input_structure)] + if nmr_magres_calculation: + task.outputs = [ + Link( + name='Output NMR calculation', + section=nmr_magres_calculation, + ) + ] + workflow.m_add_sub_section(NMRMagRes.tasks, task) + + self.archive.workflow2 = workflow + + def parse( + self, filepath: str, archive: 'EntryArchive', logger: 'BoundLogger' + ) -> None: + self.mainfile = filepath + self.maindir = os.path.dirname(self.mainfile) + self.basename = os.path.basename(self.mainfile) + self.archive = archive + + self.init_parser(logger=logger) + self._check_units_magres(logger=logger) + + # Adding Simulation to data + simulation = Simulation() + calculation_params = self.magres_file_parser.get('calculation', {}) + if calculation_params.get('code', '') != 'CASTEP': + logger.error( + 'Only CASTEP-based NMR simulations are supported by the magres parser.' + ) + return + simulation.program = Program( + name=calculation_params.get('code', ''), + version=calculation_params.get('code_version', ''), + ) + archive.data = simulation + + # `ModelSystem` parsing + model_system = self.parse_model_system(logger=logger) + if model_system is not None: + simulation.model_system.append(model_system) + + # `ModelMethod` parsing + model_method = self.parse_model_method(calculation_params=calculation_params) + simulation.model_method.append(model_method) + + # `Outputs` parsing + outputs = self.parse_outputs(simulation=simulation, logger=logger) + if outputs is not None: + simulation.outputs.append(outputs) + + # ! this will only work after the CASTEP and QE plugin parsers are defined + # Try to resolve the `entry_id` and `mainfile` of other entries in the upload to connect the magres entry with the CASTEP or QuantumESPRESSO entry + filepath_stripped = self.mainfile.split('raw/')[-1] + metadata = [] + try: + upload_id = self.archive.metadata.upload_id + search_ids = search( + owner='visible', + user_id=self.archive.metadata.main_author.user_id, + query={'upload_id': upload_id}, + required=MetadataRequired(include=['entry_id', 'mainfile']), + ).data + metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids] + except Exception: + logger.warning( + 'Could not resolve the entry_id and mainfile of other entries in the upload.' + ) + return + for entry_id, mainfile in metadata: + if mainfile == filepath_stripped: # we skip the current parsed mainfile + continue + # We try to load the archive from its context and connect both the CASTEP and the magres entries + # ? add more checks on the system information for the connection? + try: + entry_archive = self.archive.m_context.load_archive( + entry_id, upload_id, None + ) + # ! Fix this when CASTEP parser uses the new `data` schema + method_label = entry_archive.run[-1].method[-1].label + if method_label == 'NMR': + castep_archive = entry_archive + # We write the workflow NMRMagRes directly in the magres entry + self.parse_nmr_magres_file_format( + nmr_first_principles_archive=castep_archive + ) + break + except Exception: + continue + + # Populate `CCPNCMetadata` (note the `pattern` has to match the aux file generated by the MongoDB CCP-NC) + magres_json_file = get_files( + pattern='magres*.json', filepath=self.mainfile, stripname=self.basename + ) + if magres_json_file is not None: + ccpnc_metadata = CCPNCMetadata() + # TODO: populate `ccpnc_metadata` model from `magres_json_file` HERE + # ... + # ... + simulation.ccpnc_metadata = ccpnc_metadata diff --git a/src/nomad_parser_magres/utils/__init__.py b/src/nomad_parser_magres/parsers/utils/__init__.py similarity index 80% rename from src/nomad_parser_magres/utils/__init__.py rename to src/nomad_parser_magres/parsers/utils/__init__.py index c08f4e8..d5ed6a4 100644 --- a/src/nomad_parser_magres/utils/__init__.py +++ b/src/nomad_parser_magres/parsers/utils/__init__.py @@ -16,5 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .utils import get_files, numpy_type_to_json_serializable, BeyondDFTWorkflowsParser -from .magres_workflow import NMRMagRes, NMRMagResMethod, NMRMagResResults +from .utils import get_files diff --git a/src/nomad_parser_magres/parsers/utils/utils.py b/src/nomad_parser_magres/parsers/utils/utils.py new file mode 100644 index 0000000..3dbd57d --- /dev/null +++ b/src/nomad_parser_magres/parsers/utils/utils.py @@ -0,0 +1,32 @@ +import os +from glob import glob + + +def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = True): + """Get files following the `pattern` with respect to the file `stripname` (usually this + being the mainfile of the given parser) up to / down from the `filepath` (`deep=True` going + down, `deep=False` up) + + Args: + pattern (str): targeted pattern to be found + filepath (str): filepath to start the search + stripname (str, optional): name with respect to which do the search. Defaults to ''. + deep (bool, optional): boolean setting the path in the folders to scan (down or up). Defaults to down=True. + + Returns: + list: List of found files. + """ + for _ in range(10): + filenames = glob(f'{os.path.dirname(filepath)}/{pattern}') + pattern = os.path.join('**' if deep else '..', pattern) + if filenames: + break + + if len(filenames) > 1: + # filter files that match + suffix = os.path.basename(filepath).strip(stripname) + matches = [f for f in filenames if suffix in f] + filenames = matches if matches else filenames + + filenames = [f for f in filenames if os.access(f, os.F_OK)] + return filenames diff --git a/src/nomad_parser_magres/schema.py b/src/nomad_parser_magres/schema.py deleted file mode 100644 index 9838181..0000000 --- a/src/nomad_parser_magres/schema.py +++ /dev/null @@ -1,113 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import numpy as np # pylint: disable=unused-import -import typing # pylint: disable=unused-import -from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, - Category, - Package, - Quantity, - Section, - SubSection, - SectionProxy, - Reference, -) - -from runschema.calculation import ( - MagneticShielding as BaseMagneticShielding, - ElectricFieldGradient as BaseElectricFieldGradient, - SpinSpinCoupling as BaseSpinSpinCoupling, -) - - -m_package = Package() - - -class MagneticShielding(BaseMagneticShielding): - """ - Section extensions for the Run.Calculation.MagneticShielding base section. - """ - - # ! These quantities should be implemented in `BaseMagneticShielding` as refs to the specific `AtomsState` - - m_def = Section(extends_base_section=True) - - atoms = Quantity( - type=np.str_, - shape=['n_atoms', 2], - description=""" - Identifier for the atoms involved in the magnetic shielding tensor. This a list of - `n_atoms` pairs of strings [atom_label, atom_index]. The atom index corresponds to the position - on the list `System.atoms.labels`. - """, - ) - - -class ElectricFieldGradient(BaseElectricFieldGradient): - """ - Section extensions for the Run.Calculation.ElectricFieldGradient base section. - """ - - # ! These quantities should be implemented in `BaseElectricFieldGradient` as refs to the specific `AtomsState` - - m_def = Section(extends_base_section=True) - - atoms = Quantity( - type=np.str_, - shape=['n_atoms', 2], - description=""" - Identifier for the atoms involved in the electric field gradient tensor. This a list of - `n_atoms` pairs of strings [atom_label, atom_index]. The atom index corresponds to the position - on the list `System.atoms.labels`. - """, - ) - - -class SpinSpinCoupling(BaseSpinSpinCoupling): - """ - Section extensions for the Run.Calculation.SpinspinCoupling base section. - """ - - # ! These quantities should be implemented in `BaseSpinSpinCoupling` as refs to the specific `AtomsState`g` - - m_def = Section(extends_base_section=True) - - atoms_1 = Quantity( - type=np.str_, - shape=['n_atoms', 2], - description=""" - Identifier for the atoms involved in the spin-spin coupling J12 for the 1 atoms. This a list of - `n_atoms` pairs of strings [atom_label, atom_index]. The atom index corresponds to the position - on the list `System.atoms.labels`. - """, - ) - - atoms_2 = Quantity( - type=np.str_, - shape=['n_atoms', 2], - description=""" - Identifier for the atoms involved in the spin-spin coupling J12 for the 2 atoms. This a list of - `n_atoms` pairs of strings [atom_label, atom_index]. The atom index corresponds to the position - on the list `System.atoms.labels`. - """, - ) - - -m_package.__init_metainfo__() diff --git a/src/nomad_parser_magres/schema_packages/__init__.py b/src/nomad_parser_magres/schema_packages/__init__.py new file mode 100644 index 0000000..d3f9d37 --- /dev/null +++ b/src/nomad_parser_magres/schema_packages/__init__.py @@ -0,0 +1,17 @@ +from nomad.config.models.plugins import SchemaPackageEntryPoint +from pydantic import Field + + +class MagresSchemaPackageEntryPoint(SchemaPackageEntryPoint): + parameter: int = Field(0, description='Custom configuration parameter') + + def load(self): + from nomad_parser_magres.schema_packages.package import m_package + + return m_package + + +nomad_parser_magres_schema = MagresSchemaPackageEntryPoint( + name='MagresSchemaPackageEntryPoint', + description='Entry point for the magres code-specific schema.', +) diff --git a/src/nomad_parser_magres/schema_packages/ccpnc_metadata.py b/src/nomad_parser_magres/schema_packages/ccpnc_metadata.py new file mode 100644 index 0000000..544bfe5 --- /dev/null +++ b/src/nomad_parser_magres/schema_packages/ccpnc_metadata.py @@ -0,0 +1,124 @@ +import numpy as np +from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import JSON, Quantity, SubSection + + +class MaterialProperties(ArchiveSection): + # Note from @JosePizarro3: note we have all these information somewhere else in the `nomad_simulations` schema. + # Nevertheless, if you feel it is better to keep these quantities here for clarity, it is totally fine for me. + chemical_name = Quantity( + type=str, + description=""" + Free-text chemical name assigned by users. + """, + ) + + chemical_name_tokens = Quantity( + type=str, + shape=['*'], + description=""" + Free-text chemical name, but tokenised to take individual words in the name to assist in wildcard searches. + """, + ) + + formula = Quantity( + # type=[(str, int)], # better to use JSON type + type=JSON, + shape=['*'], + description=""" + Dictionary containing the species (chemical symbol of an element in the material) as keys and + number of atoms of that element in the material as their value. + """, + ) + + stoichiometry = Quantity( + type=JSON, + shape=['*'], + description=""" + Reduced proportion of materials details. + """, + ) + + elements_ratios = Quantity( + type=np.float64, + shape=['*'], + description=""" + Ratio of constituent elements (each element is a number between 0 and 1). + """, + ) + + # Note from @JosePizarro3: in the `nomad_simulations` schema we have a sub-section under `archive.data.model_system[*].chemical_formula` + # where we compiled a bunch of different formats. There, `chemical_formula.descriptive` is selected depending on the + # specific case (in organic and inorganic chemistry, the descriptive formula is different). + chemical_formula_descriptive = Quantity( + type=str, + description=""" + Formula as a string, e.g., 'C2H6O'. + """, + ) + + +class ORCID(ArchiveSection): + orcid_id = Quantity( + type=JSON, + shape=['*'], + description=""" + Dictionary containing the ORCID IDs of the author (keys) and uploader (values) profiles. + """, + ) + + +class CCPNCRecord(ArchiveSection): + visible = Quantity( + type=bool, + description=""" + A boolean value that indicates if the record is to be hidden or available to be returned when searched + """, + ) + + immutable_id = Quantity( + type=str, + description=""" + 7 digit unique record identifier. + """, + ) + + +class ExternalDatabaseReference(ArchiveSection): + external_database_name = Quantity( + type=str, + description=""" + External database name where additional information on the material exists + """, + ) + + external_database_reference_code = Quantity( + type=str, + description=""" + Specific database code pointing to the material or a polymorphic form of the material. + """, + ) + + +class FreeTextMetadata(ArchiveSection): + uploader_author_notes = Quantity( + type=str, + description=""" + Additional metadata that authors want to indicate about the computation. + """, + ) + + structural_descriptor_notes = Quantity( + type=str, + description=""" + Additional notes specific to the polymorphic forms of the material. + """, + ) + + +class CCPNCMetadata(ArchiveSection): + material_properties = SubSection(section_def=MaterialProperties) + orcid = SubSection(section_def=ORCID) + ccpnc_record = SubSection(section_def=CCPNCRecord) + external_database_reference = SubSection(section_def=ExternalDatabaseReference) + free_text_metadata = SubSection(section_def=FreeTextMetadata) diff --git a/src/nomad_parser_magres/schema_packages/package.py b/src/nomad_parser_magres/schema_packages/package.py new file mode 100644 index 0000000..06bcc4b --- /dev/null +++ b/src/nomad_parser_magres/schema_packages/package.py @@ -0,0 +1,402 @@ +from typing import TYPE_CHECKING, Optional + +import numpy as np + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from nomad.metainfo import Context, Section + from structlog.stdlib import BoundLogger + +from nomad.config import config +from nomad.datamodel.metainfo.basesections import Entity +from nomad.metainfo import MEnum, Quantity, SchemaPackage, Section, SubSection +from nomad_simulations.schema_packages.atoms_state import AtomsState +from nomad_simulations.schema_packages.general import Simulation +from nomad_simulations.schema_packages.outputs import Outputs as BaseOutputs +from nomad_simulations.schema_packages.physical_property import PhysicalProperty + +# Import the CCPNCMetadata section (always use absolute paths for imports) +from nomad_parser_magres.schema_packages.ccpnc_metadata import CCPNCMetadata + +configuration = config.get_plugin_entry_point( + 'nomad_parser_magres.schema_packages:nomad_parser_magres_schema' +) + +m_package = SchemaPackage() + + +def resolve_name_from_entity_ref(entities: list[Entity], logger: 'BoundLogger') -> str: + """ + Resolves the `name` of the atom-resolved `PhysicalProperty` from the `entity_ref` by assigning + a label corresponding to the `AtomsState.chemical_symbol` and a number corresponding to the + position in the list of `AtomsState`. + + Args: + entities (list[Entity]): The list of entities to resolve the name from. + logger ('BoundLogger'): The logger to log messages. + + Returns: + (str): The resolved name of the atom-resolved `PhysicalProperty`. + """ + name = '' + for entity in entities: + atoms_state = entity + # Check if `entity_ref` exists and it is an AtomsState + if not atoms_state or not isinstance(atoms_state, AtomsState): + logger.error( + 'Could not find `entity_ref` referencing an `AtomsState` section.' + ) + return '' + # Check if the parent of `entity_ref` exists + cell = atoms_state.m_parent + if not cell: + logger.warning( + 'The parent of the `AtomsState` in `entity_ref` does not exist.' + ) + return '' + + index = '' # ! implement here if needed + name += f'{atoms_state.chemical_symbol}{index}' + return name + + +class MagneticShieldingIsotropic(PhysicalProperty): + """ + The isotropic part of the `MagneticShieldingTensor`. This is 1/3 of the trace of the magnetic + shielding tensor (see `extract_isotropic_part()` function in `MagneticShieldingTensor`). + + See, e.g, https://pubs.acs.org/doi/10.1021/cr300108a. + + This property will appear as a list under `Outputs` where each of the elements correspond to an atom in the unit cell + The specific atom is known by defining the reference to the specific `AtomsState` under `ModelSystem.cell.atoms_state` + using `entity_ref`. + """ + + value = Quantity( + type=np.float64, + unit='dimensionless', + description=""" + Value of the isotropic part of the magnetic shielding tensor per atom. + """, + ) + + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Resolve `name` to be from the `entity_ref` + self.name = resolve_name_from_entity_ref( + entities=[self.entity_ref], logger=logger + ) + + +class MagneticShieldingTensor(PhysicalProperty): + """ + Nuclear response of a material to shield the effects of an applied external field. This is a tensor 3x3 related with + the induced magnetic field as: + + B_induced = - magnetic_shielding * B_external + + See, e.g, https://pubs.acs.org/doi/10.1021/cr300108a. + + This property will appear as a list under `Outputs` where each of the elements correspond to an atom in the unit cell + The specific atom is known by defining the reference to the specific `AtomsState` under `ModelSystem.cell.atoms_state` + using `entity_ref`. + """ + + value = Quantity( + type=np.float64, + unit='dimensionless', + description=""" + Value of the magnetic shielding tensor per atom. + """, + ) + + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.rank = [3, 3] # ! move this to definitions + self.name = self.m_def.name + + def extract_isotropic_part( + self, logger: 'BoundLogger' + ) -> Optional[MagneticShieldingIsotropic]: + """ + Extract the isotropic part of the magnetic shielding tensor. This is 1/3 of the trace of the magnetic + shielding tensor `value`. + + Args: + logger ('BoundLogger'): The logger to log messages. + + Returns: + (Optional[MagneticShieldingIsotropic]): The isotropic part of the magnetic shielding tensor. + """ + isotropic = MagneticShieldingIsotropic() + try: + isotropic.value = np.trace(self.value) / 3.0 + except Exception: + logger.warning('Could not extract the trace of the `value` tensor.') + return None + isotropic.physical_property_ref = self # derived quantity + return isotropic + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Resolve `name` to be from the `entity_ref` + self.name = resolve_name_from_entity_ref( + entities=[self.entity_ref], logger=logger + ) + + # `MagneticShieldingIsotropic` extraction + isotropic = self.extract_isotropic_part(logger) + if isotropic is not None: + self.m_parent.magnetic_shieldings_isotropic.append(isotropic) + + +class ElectricFieldGradient(PhysicalProperty): + """ + Interaction between the quadrupole moment of the nucleus and the electric field gradient (EFG) + at the nucleus position generated by the surrounding charges. This property is relevant for + Nuclear Magnetic Resonance (NMR). The eigenvalues of these tensors can be used to compute + the `quadrupolar_coupling_constant` and the `asymmetry_parameter`. + + See, e.g, https://pubs.acs.org/doi/10.1021/cr300108a. + + This property will appear as a list under `Outputs` where each of the elements correspond to an atom in the unit cell + The specific atom is known by defining the reference to the specific `AtomsState` under `ModelSystem.cell.atoms_state` + using `entity_ref`. + """ + + type = Quantity( + type=MEnum('total', 'local', 'non_local'), + description=""" + Type of contribution to the electric field gradient (EFG). The total EFG can be decomposed + on the `local` and `non_local` contributions. + """, + ) + + value = Quantity( + type=np.float64, + unit='volt / meter ** 2', + description=""" + Value of the electric field gradient (EFG) for each `contribution` per unit area. + """, + ) + + quadrupolar_coupling_constant = Quantity( + type=np.float64, + description=""" + Quadrupolar coupling constant for each atom in the unit cell. It is computed from + the eigenvalues of the EFG tensor as: + + quadrupolar_coupling_constant = efg_zz * e * Z / h + + where efg_zz is the largest eigenvalue of the EFG tensor, Z is the atomic number. + """, + ) + + asymmetry_parameter = Quantity( + type=np.float64, + description=""" + Asymmetry parameter for each atom in the unit cell. It is computed from the + eigenvalues of the EFG tensor as: + + asymmetry_parameter = (efg_xx - efg_yy) / efg_zz + + where efg_xx, efg_yy and efg_zz are the eigenvalues of the EFG tensor ordered + such that |efg_zz| > |efg_yy| > |efg_xx|. + """, + ) + + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.rank = [3, 3] # ! move this to definitions + + def resolve_quadrupolar_coupling_constant(self, logger: 'BoundLogger') -> None: + pass + + def resolve_asymmetry_parameter(self, logger: 'BoundLogger') -> None: + pass + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Resolve `name` to be from the `entity_ref` + self.name = resolve_name_from_entity_ref( + entities=[self.entity_ref], logger=logger + ) + + # TODO add normalization to extract `quadrupolar_coupling_constant` and `asymmetry_parameter` + + +class SpinSpinCoupling(PhysicalProperty): + """ + Indirect exchanges or interactions between 2 nuclear spins that arises from hyperfine interactions between + the nuclei and local electrons. + + This property will appear as a list under `Outputs` where each of the elements correspond to an atom-atom + coupling term. The specific pair of atoms defined for the coupling is known by referencing the specific `AtomsState` + under `ModelSystem.cell.atoms_state` using `entity_ref_1` and `entity_ref_2`. + + Synonyms: + - IndirectSpinSpinCoupling + """ + + # TODO extend this to other spin-spin coupling types besides indirect (which is useful in NMR) + + # we hide `entity_ref` from `PhysicalProperty` to avoid confusion + m_def = Section(a_eln={'hide': ['entity_ref']}) + + type = Quantity( + type=MEnum( + 'total', + 'direct_dipolar', + 'fermi_contact', + 'orbital_diamagnetic', + 'orbital_paramagnetic', + 'spin_dipolar', + ), + description=""" + Type of contribution to the indirect spin-spin coupling. The total indirect spin-spin + coupling is composed of: + + `total` = `direct_dipolar` + J_coupling + + Where the J_coupling is: + J_coupling = `fermi_contact` + + `spin_dipolar` + + `orbital_diamagnetic` + + `orbital_paramagnetic` + + See https://pubs.acs.org/doi/full/10.1021/cr300108a. + """, + ) + + value = Quantity( + type=np.float64, + unit='joule', + description=""" + Value of the indirect spin-spin couplings for each contribution. + """, + ) + + reduced_value = Quantity( + type=np.float64, + unit='kelvin**2 / joule', + shape=[3, 3], # dynamical shape only works for `PhysicalProperty.value` + description=""" + Reduced value of the indirect spin-spin couplings for each contribution. It relates with the + normal value as: + + reduced_value = value / (gyromagnetic_ratio_i * gyromagnetic_ratio_j * 2 * np.pi * hbar) + + where i, j runs for each atom in the unit cell. + """, + ) + + entity_ref_1 = Quantity( + type=Entity, + description=""" + Reference to the first entity that the coupling refers to. In this case, this is the + first `AtomsState` in the pair of atoms that the coupling refers to. + """, + ) + + entity_ref_2 = Quantity( + type=Entity, + description=""" + Reference to the second entity that the coupling refers to. In this case, this is the + second `AtomsState` in the pair of atoms that the coupling refers to. + """, + ) + + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.rank = [3, 3] # ! move this to definitions + + def resolve_reduced_value(self, logger: 'BoundLogger') -> None: + pass + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Resolve `name` to be from the `entity_ref` + self.name = resolve_name_from_entity_ref( + entities=[self.entity_ref_1, self.entity_ref_2], logger=logger + ) + + # TODO add normalization to extract `value` from `reduced_value` + # TODO add normalization to extract `reduced_value` from `value` + + +class MagneticSusceptibility(PhysicalProperty): + """ + Section containing the information of magnetic susceptibility tensor. Degree of + magnetization of a material in the presence of a magnetic field. + """ + + # TODO currently only the macroscopic quantity is being supported + + m_def = Section(validate=False) + + scale_dimension = Quantity( + type=MEnum('microscopic', 'macroscopic'), + description=""" + Identifier of the scale dimension of the magnetic susceptibility tensor. + """, + ) + + value = Quantity( # TODO extend this to microscopic contributions + type=np.float64, + unit='dimensionless', + description=""" + Value of the magnetic susceptibility tensor. + """, + ) + + def __init__( + self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs + ) -> None: + super().__init__(m_def, m_context, **kwargs) + self.rank = [3, 3] # ! move this to definitions + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + +class Outputs(BaseOutputs): + """ + The outputs of the magres file format. + """ + + magnetic_shieldings = SubSection( + sub_section=MagneticShieldingTensor.m_def, repeats=True + ) + magnetic_shieldings_isotropic = SubSection( + sub_section=MagneticShieldingIsotropic.m_def, repeats=True + ) + electric_field_gradients = SubSection( + sub_section=ElectricFieldGradient.m_def, repeats=True + ) + spin_spin_couplings = SubSection(sub_section=SpinSpinCoupling.m_def, repeats=True) + magnetic_susceptibilities = SubSection( + sub_section=MagneticSusceptibility.m_def, repeats=True + ) + + +# Define the CCPNCSimulation class holding CCP-NC specific metadata +class CCPNCSimulation(Simulation): + ccpnc_metadata = SubSection(section_def=CCPNCMetadata) + + +m_package.__init_metainfo__() diff --git a/src/nomad_parser_magres/utils/magres_workflow.py b/src/nomad_parser_magres/schema_packages/workflow.py similarity index 53% rename from src/nomad_parser_magres/utils/magres_workflow.py rename to src/nomad_parser_magres/schema_packages/workflow.py index 2075110..a9004cc 100644 --- a/src/nomad_parser_magres/utils/magres_workflow.py +++ b/src/nomad_parser_magres/schema_packages/workflow.py @@ -1,27 +1,9 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from nomad.metainfo import SubSection, Quantity, Reference -from nomad.datamodel.metainfo.simulation.method import Method +from nomad.metainfo import Quantity, Reference, SubSection +from nomad_simulations.schema_packages.model_method import ModelMethod from simulationworkflowschema import ( - SimulationWorkflowResults, - SimulationWorkflowMethod, SerialSimulation, + SimulationWorkflowMethod, + SimulationWorkflowResults, ) @@ -39,7 +21,7 @@ class NMRMagResMethod(SimulationWorkflowMethod): """ nmr_method_ref = Quantity( - type=Reference(Method), + type=Reference(ModelMethod), description=""" Reference to the NMR (first principles) methodology. """, diff --git a/src/nomad_parser_magres/utils/utils.py b/src/nomad_parser_magres/utils/utils.py deleted file mode 100644 index 4e2a14f..0000000 --- a/src/nomad_parser_magres/utils/utils.py +++ /dev/null @@ -1,635 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import os -from glob import glob - -from typing import Union -from nomad.utils import extract_section -from nomad.datamodel import EntryArchive -from runschema.run import Run -from nomad.datamodel.metainfo.workflow import Link, TaskReference -from simulationworkflowschema import ( - DFTPlusGW, - DFTPlusGWMethod, - DFTPlusTBPlusDMFT, - DFTPlusTBPlusDMFTMethod, - XS, - XSMethod, - FirstPrinciplesPlusTB, - FirstPrinciplesPlusTBMethod, - DMFTPlusMaxEnt, - DMFTPlusMaxEntMethod, - PhotonPolarization, - PhotonPolarizationMethod, - PhotonPolarizationResults, -) - -# Special file-format workflow definition -from .magres_workflow import ( - NMRMagRes, - NMRMagResMethod, - NMRMagResResults, -) - - -def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = True): - """Get files following the `pattern` with respect to the file `stripname` (usually this - being the mainfile of the given parser) up to / down from the `filepath` (`deep=True` going - down, `deep=False` up) - - Args: - pattern (str): targeted pattern to be found - filepath (str): filepath to start the search - stripname (str, optional): name with respect to which do the search. Defaults to ''. - deep (bool, optional): boolean setting the path in the folders to scan (down or up). Defaults to down=True. - - Returns: - list: List of found files. - """ - for _ in range(10): - filenames = glob(f'{os.path.dirname(filepath)}/{pattern}') - pattern = os.path.join('**' if deep else '..', pattern) - if filenames: - break - - if len(filenames) > 1: - # filter files that match - suffix = os.path.basename(filepath).strip(stripname) - matches = [f for f in filenames if suffix in f] - filenames = matches if matches else filenames - - filenames = [f for f in filenames if os.access(f, os.F_OK)] - return filenames - - -def numpy_type_to_json_serializable( - quantity: Union[np.bool_, np.int32, np.int64, np.float64], -): - """Converts numpy data types to native Python types suitable for JSON serialization. - - Args: - quantity (Union[np.bool_, np.int32, np.int64, np.float64]): The numpy data type - value to be converted. - - Returns: - Union[bool, int, float]: The converted native Python type. - """ - if isinstance(quantity, np.bool_): - return bool(quantity) - if isinstance(quantity, (np.int32, np.int64)): - return int(quantity) - if isinstance(quantity, np.float64): - return float(quantity) - - -class BeyondDFTWorkflowsParser: - """ - Generates automatic beyondDFT (GW, BSE, DMFT) workflows. Main classes for parsers will - inherit from here if some automatic workflow parsing has been implemented. - """ - - def __init__( - self, - archive: EntryArchive, - _child_archives: dict, - _xs_spectra_types: list, - logger, - ): - self.archive = archive - self._child_archives = _child_archives - self._xs_spectra_types = _xs_spectra_types - - def run_workflow_archive(self, workflow_archive: EntryArchive): - """Initializes the workflow archive by checking if Run exists or not, as well as - copying Program and System into it. - - Args: - workflow_archive (EntryArchive): the workflow archive targeted for initialization - """ - if workflow_archive.run: - sec_run = workflow_archive.run[-1] - else: - sec_run = Run() - workflow_archive.run.append(sec_run) - sec_run.program = self.archive.run[-1].program - - def parse_gw_workflow( - self, gw_archive: EntryArchive, gw_workflow_archive: EntryArchive - ): - """Automatically parses the GW workflow. Here, `self.archive` is the DFT archive. - - Args: - gw_archive (EntryArchive): the GW archive - gw_workflow_archive (EntryArchive): the GW workflow archive - """ - self.run_workflow_archive(gw_workflow_archive) - gw_workflow_archive.run[-1].m_add_sub_section( - Run.system, self.archive.run[-1].system[-1] - ) - - workflow = DFTPlusGW(method=DFTPlusGWMethod()) - - # Method - method_gw = extract_section(gw_archive, ['run', 'method', 'gw']) - method_xcfunctional = extract_section( - self.archive, ['run', 'method', 'dft', 'xc_functional'] - ) - method_basisset = extract_section( - self.archive, ['run', 'method', 'electrons_representation'] - ) - workflow.method.gw_method_ref = method_gw - workflow.method.starting_point = method_xcfunctional - workflow.method.electrons_representation = method_basisset - - # Inputs and Outputs - input_structure = extract_section(self.archive, ['run', 'system']) - dft_calculation = extract_section(self.archive, ['run', 'calculation']) - gw_calculation = extract_section(gw_archive, ['run', 'calculation']) - if input_structure: - workflow.m_add_sub_section( - DFTPlusGW.inputs, Link(name='Input structure', section=input_structure) - ) - if gw_calculation: - workflow.m_add_sub_section( - DFTPlusGW.outputs, - Link(name='Output GW calculation', section=gw_calculation), - ) - - # DFT task - if self.archive.workflow2: - task = TaskReference(task=self.archive.workflow2) - task.name = 'DFT' - # TODO check why this re-writting is necessary to not repeat sections inside tasks - if input_structure: - task.inputs = [Link(name='Input structure', section=input_structure)] - if dft_calculation: - task.outputs = [ - Link(name='Output DFT calculation', section=dft_calculation) - ] - workflow.m_add_sub_section(DFTPlusGW.tasks, task) - - # GW task - if gw_archive.workflow2: - task = TaskReference(task=gw_archive.workflow2) - task.name = 'GW' - if dft_calculation: - task.inputs = [ - Link(name='Output DFT calculation', section=dft_calculation) - ] - if gw_calculation: - task.outputs = [ - Link(name='Output GW calculation', section=gw_calculation) - ] - workflow.m_add_sub_section(DFTPlusGW.tasks, task) - - gw_workflow_archive.workflow2 = workflow - - def parse_tb_workflow( - self, - tb_archive: EntryArchive, - first_principles_calculation_archive: EntryArchive, - tb_workflow_archive: EntryArchive, - ): - """Automatically parses the TB workflow. Here, `self.archive` is the DFT archive. - - Args: - tb_archive (EntryArchive): the Tight-Binding archive - first_principles_calculation_archive (EntryArchive): the first-principles-calculation archive - tb_workflow_archive (EntryArchive): the Tight-Binding workflow archive - """ - self.run_workflow_archive(tb_workflow_archive) - tb_workflow_archive.run[-1].m_add_sub_section( - Run.system, first_principles_calculation_archive.run[-1].system[-1] - ) - workflow = FirstPrinciplesPlusTB(method=FirstPrinciplesPlusTBMethod()) - - # Method - method_first_principles = extract_section( - first_principles_calculation_archive, ['run', 'method'] - ) - method_tb = extract_section(tb_archive, ['run', 'method', 'tb']) - workflow.method.first_principles_method_ref = method_first_principles - workflow.method.tb_method_ref = method_tb - - # Inputs and Outputs - input_structure = extract_section( - first_principles_calculation_archive, ['run', 'system'] - ) - first_principles_calculation = extract_section( - first_principles_calculation_archive, ['run', 'calculation'] - ) - tb_calculation = extract_section(tb_archive, ['run', 'calculation']) - if input_structure: - workflow.m_add_sub_section( - FirstPrinciplesPlusTB.inputs, - Link(name='Input Structure', section=input_structure), - ) - if tb_calculation: - workflow.m_add_sub_section( - FirstPrinciplesPlusTB.outputs, - Link(name='Output TB Model', section=tb_calculation), - ) - - # First Principles Calculation task - if self.archive.workflow2: - first_principles_task = TaskReference( - task=first_principles_calculation_archive.workflow2 - ) - first_principles_task.name = 'FirstPrinciples' - if input_structure: - first_principles_task.inputs = [ - Link(name='Input Structure', section=input_structure) - ] - if first_principles_calculation: - first_principles_task.outputs = [ - Link( - name='Output FirstPrinciples Calculation', - section=first_principles_calculation, - ) - ] - workflow.m_add_sub_section( - FirstPrinciplesPlusTB.tasks, first_principles_task - ) - - # TB task - if tb_archive.workflow2: - tb_task = TaskReference(task=tb_archive.workflow2) - tb_task.name = 'TB' - if first_principles_calculation: - tb_task.inputs = [ - Link( - name='Input FirstPrinciples Calculation', - section=first_principles_calculation, - ) - ] - if tb_calculation: - tb_task.outputs = [Link(name='Output TB Model', section=tb_calculation)] - workflow.m_add_sub_section(FirstPrinciplesPlusTB.tasks, tb_task) - - tb_workflow_archive.workflow2 = workflow - - def parse_photon_workflow(self): - """Automatically parses the PhotonPolarization workflow. Here, `self.archive` is - the photon_workflow archive, and `self._child_archives` the archives for SinglePoint - photons. - """ - workflow = PhotonPolarization( - method=PhotonPolarizationMethod(), results=PhotonPolarizationResults() - ) - workflow.name = 'BSE' # this entry contains the full BSE calculation for all photon polarizations - - # Method - method_bse = extract_section(self.archive, ['run', 'method', 'bse']) - workflow.method.bse_method_ref = method_bse - - # Inputs - input_structure = extract_section(self.archive, ['run', 'system']) - workflow.m_add_sub_section( - PhotonPolarization.inputs, - Link(name='Input structure', section=input_structure), - ) - input_method = extract_section(self.archive, ['run', 'method']) - workflow.m_add_sub_section( - PhotonPolarization.inputs, - Link(name='Input BSE methodology', section=input_method), - ) - - # Outputs - spectra = [] - for index, path in enumerate(self._child_archives.keys()): - archive = self._child_archives.get(path) - - output_polarization = extract_section(archive, ['run', 'calculation']) - if output_polarization: - workflow.m_add_sub_section( - PhotonPolarization.outputs, - Link( - name=f'Output polarization {index + 1}', - section=output_polarization, - ), - ) - spectra.append(output_polarization.spectra[0]) - - # Tasks - if archive.workflow2: - task = TaskReference(task=archive.workflow2) - task.name = f'Photon {index + 1}' - input_photon_method = archive.run[-1].method[0] - if input_photon_method and input_structure: - task.inputs = [ - Link(name='Input structure', section=input_structure), - Link( - name='Input photon parameters', section=input_photon_method - ), - ] - if output_polarization: - task.outputs = [ - Link( - name=f'Output polarization {index + 1}', - section=output_polarization, - ) - ] - workflow.m_add_sub_section(PhotonPolarization.tasks, task) - - # Results - workflow.results.n_polarizations = len(spectra) - workflow.results.spectrum_polarization = spectra - - self.archive.workflow2 = workflow - - def parse_xs_workflow( - self, xs_archives: EntryArchive, xs_workflow_archive: EntryArchive - ): - """Automatically parses the XS workflow. Here, `self.archive` is the DFT archive. - - Args: - xs_archives (EntryArchive): the XS archive - xs_workflow_archive (EntryArchive): the XS workflow archive - """ - self.run_workflow_archive(xs_workflow_archive) - xs_workflow_archive.run[-1].m_add_sub_section( - Run.system, self.archive.run[-1].system[-1] - ) - - def extract_polarization_outputs(): - output = [] - index = 0 - for path, archive in self._child_archives.items(): - if os.path.basename(path).split('_')[0] in self._xs_spectra_types: - output_polarization = archive.run[-1].calculation[-1] - output.append( - Link( - name=f'Output polarization {index + 1}', - section=output_polarization, - ) - ) - index += 1 - return output - - workflow = XS(method=XSMethod()) - workflow.name = 'XS' - - # Inputs and Outputs - input_structure = extract_section(self.archive, ['run', 'system']) - dft_calculation = extract_section(self.archive, ['run', 'calculation']) - polarization_calculations = extract_polarization_outputs() - if input_structure: - workflow.m_add_sub_section( - XS.inputs, Link(name='Input structure', section=input_structure) - ) - for index, polarizations in enumerate(polarization_calculations): - workflow.m_add_sub_section( - XS.outputs, - Link(name=f'Polarization {index + 1}', section=polarizations), - ) - - # DFT task - if self.archive.workflow2: - task = TaskReference(task=self.archive.workflow2) - task.name = 'DFT' - if input_structure: - task.inputs = [Link(name='Input structure', section=input_structure)] - if dft_calculation: - task.outputs = [ - Link(name='Output DFT calculation', section=dft_calculation) - ] - workflow.m_add_sub_section(XS.tasks, task) - - # Spectra task - for index, xs_archive in enumerate(xs_archives): - if not xs_archive.workflow2: - continue - task = TaskReference(task=xs_archive.workflow2) - task.name = f'BSE {index + 1}' - if dft_calculation: - xs_archive.workflow2.m_add_sub_section( - PhotonPolarization.inputs, - Link(name='Output DFT calculation', section=dft_calculation), - ) - task.inputs = [ - Link(name='Output DFT calculation', section=dft_calculation) - ] - for i_photon, photon_task in enumerate(xs_archive.workflow2.tasks): - photon_task.m_add_sub_section( - TaskReference.inputs, - Link(name='Output DFT calculation', section=dft_calculation), - ) - if photon_task.m_xpath('outputs[-1].section'): - task.m_add_sub_section( - TaskReference.outputs, - Link( - name=f'Polarization {i_photon + 1}', - section=photon_task.outputs[-1].section, - ), - ) - workflow.m_add_sub_section(XS.tasks, task) - - xs_workflow_archive.workflow2 = workflow - - def parse_dmft_maxent_workflow( - self, maxent_archive: EntryArchive, workflow_archive: EntryArchive - ): - """Automatically parses the DMFT+MaxEnt workflow. Here, `self.archive` is the DMFT archive. - - Args: - maxent_archive (EntryArchive): the MaxEnt archive - workflow_archive (EntryArchive): the DMFT+MaxEnt workflow archive - """ - - workflow = DMFTPlusMaxEnt(method=DMFTPlusMaxEntMethod()) - - # Method - method_dmft = extract_section(self.archive, ['run', 'method', 'dmft']) - method_maxent = extract_section(maxent_archive, ['run', 'method']) - workflow.method.dmft_method_ref = method_dmft - workflow.method.maxent_method_ref = method_maxent - - # Inputs and Outputs - input_structure = extract_section(self.archive, ['run', 'system']) - dmft_calculation = extract_section(self.archive, ['run', 'calculation']) - maxent_calculation = extract_section(maxent_archive, ['run', 'calculation']) - workflow_maxent_calculation = extract_section( - workflow_archive, ['run', 'calculation'] - ) - if input_structure: - workflow.m_add_sub_section( - DMFTPlusMaxEnt.inputs, - Link(name='Input structure', section=input_structure), - ) - if maxent_calculation and workflow_maxent_calculation: - outputs = [ - Link( - name='Output MaxEnt Sigma calculation', section=maxent_calculation - ), - Link( - name='Output MaxEnt GF and DOS calculation', - section=workflow_maxent_calculation, - ), - ] - workflow.outputs = outputs - - # DMFT task - if self.archive.workflow2: - task = TaskReference(task=self.archive.workflow2) - task.name = 'DMFT' - if input_structure: - task.inputs = [Link(name='Input structure', section=input_structure)] - if dmft_calculation: - task.outputs = [ - Link(name='Output DMFT calculation', section=dmft_calculation) - ] - workflow.m_add_sub_section(DMFTPlusMaxEnt.tasks, task) - - # MaxEnt task - if maxent_archive.workflow2: - task = TaskReference(task=maxent_archive.workflow2) - task.name = 'MaxEnt' - if dmft_calculation: - task.inputs = [ - Link(name='Output DMFT calculation', section=dmft_calculation) - ] - if maxent_calculation: - task.outputs = [ - Link( - name='Output MaxEnt Sigma calculation', - section=maxent_calculation, - ) - ] - workflow.m_add_sub_section(DMFTPlusMaxEnt.tasks, task) - - workflow_archive.workflow2 = workflow - - def parse_dmft_workflow( - self, wannier_archive: EntryArchive, dmft_workflow_archive: EntryArchive - ): - # TODO extend for DFT tasks - self.run_workflow_archive(dmft_workflow_archive) - # Check if system exists in the DMFT archive or not, and whether it exists on the - # Wannier90 archive or not, and then add it. - try: - sec_system = self.archive.run[-1].system[-1] - dmft_workflow_archive.run[-1].m_add_sub_section(Run.system, sec_system) - except Exception: - if wannier_archive.run[-1].system[-1]: - sec_system = wannier_archive.run[-1].system[-1] - self.archive.run[-1].m_add_sub_section(Run.system, sec_system) - dmft_workflow_archive.run[-1].m_add_sub_section(Run.system, sec_system) - - workflow = DFTPlusTBPlusDMFT(method=DFTPlusTBPlusDMFTMethod()) - - # Method - method_proj = extract_section(wannier_archive, ['run', 'method', 'tb']) - method_dmft = extract_section(self.archive, ['run', 'method', 'dmft']) - workflow.method.tb_method_ref = method_proj - workflow.method.dmft_method_ref = method_dmft - - # Inputs and Outputs - input_structure = extract_section(wannier_archive, ['run', 'system']) - wannier_calculation = extract_section(wannier_archive, ['run', 'calculation']) - dmft_calculation = extract_section(self.archive, ['run', 'calculation']) - if input_structure: - workflow.m_add_sub_section( - DFTPlusTBPlusDMFT.inputs, - Link(name='Input structure', section=input_structure), - ) - if dmft_calculation: - workflow.m_add_sub_section( - DFTPlusTBPlusDMFT.outputs, - Link(name='Output DMFT calculation', section=dmft_calculation), - ) - - # Wannier90 task - if wannier_archive.workflow2: - task = TaskReference(task=wannier_archive.workflow2) - task.name = 'TB' - # TODO check why this re-writting is necessary to not repeat sections inside tasks - if input_structure: - task.inputs = [Link(name='Input structure', section=input_structure)] - if wannier_calculation: - task.outputs = [ - Link(name='Output TB calculation', section=wannier_calculation) - ] - workflow.m_add_sub_section(DFTPlusTBPlusDMFT.tasks, task) - - # DMFT task - if self.archive.workflow2: - task = TaskReference(task=self.archive.workflow2) - task.name = 'DMFT' - if wannier_calculation: - task.inputs = [ - Link(name='Output TB calculation', section=wannier_calculation) - ] - if dmft_calculation: - task.outputs = [ - Link(name='Output DMFT calculation', section=dmft_calculation) - ] - workflow.m_add_sub_section(DFTPlusTBPlusDMFT.tasks, task) - - dmft_workflow_archive.workflow2 = workflow - - def parse_nmr_magres_file_format(self, nmr_first_principles_archive: EntryArchive): - """ - Automatically parses the NMR Magres workflow. Here, `self.archive` is the - NMR magres archive in which we will link the original NMR first principles (CASTEP - or QuantumEspresso-GIPAW) - - Args: - nmr_first_principles_archive (EntryArchive): the NMR (first principles) - SinglePoint archive - """ - workflow = NMRMagRes(method=NMRMagResMethod(), results=NMRMagResResults()) - workflow.name = 'NMR MagRes' - - # Method - method_nmr = extract_section(nmr_first_principles_archive, ['run', 'method']) - workflow.method.nmr_method_ref = method_nmr - - # Inputs and Outputs - input_structure = extract_section( - nmr_first_principles_archive, ['run', 'system'] - ) - nmr_magres_calculation = extract_section(self.archive, ['run', 'calculation']) - if input_structure: - workflow.m_add_sub_section( - NMRMagRes.inputs, Link(name='Input structure', section=input_structure) - ) - if nmr_magres_calculation: - workflow.m_add_sub_section( - NMRMagRes.outputs, - Link(name='Output NMR calculation', section=nmr_magres_calculation), - ) - - # NMR (first principles) task - if nmr_first_principles_archive.workflow2: - task = TaskReference(task=nmr_first_principles_archive.workflow2) - task.name = 'NMR FirstPrinciples' - if input_structure: - task.inputs = [Link(name='Input structure', section=input_structure)] - if nmr_magres_calculation: - task.outputs = [ - Link( - name='Output NMR calculation', - section=nmr_magres_calculation, - ) - ] - workflow.m_add_sub_section(NMRMagRes.tasks, task) - - self.archive.workflow2 = workflow diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..5cdfd19 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +from nomad import utils + +logger = utils.get_logger(__name__) diff --git a/tests/test_parser.py b/tests/test_parser.py index b8fb828..bb63cd2 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,27 +1,12 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest -import numpy as np import os +import numpy as np +import pytest from nomad.datamodel import EntryArchive -from nomad_parser_magres.parser import MagresParser + +from nomad_parser_magres.parsers.parser import MagresParser + +from . import logger def approx(value, abs=0, rel=1e-6): @@ -36,50 +21,87 @@ def parser(): def test_single_point_ethanol(parser): archive = EntryArchive() parser.parse( - os.path.join(os.path.dirname(__file__), 'data/ethanol_nmr.magres'), + os.path.join('tests', 'data', 'ethanol_nmr.magres'), archive, - None, + logger, ) - sec_run = archive.run[-1] + simulation = archive.data - # Program testing - assert sec_run.program.name == 'CASTEP' - assert sec_run.program.version == '24.1' + # Program + assert simulation.program.name == 'CASTEP' + assert simulation.program.version == '24.1' - # System testing - assert len(sec_run.system) == 1 - sec_system = sec_run.system[-1] - assert sec_system.atoms.labels == ['H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'O'] - assert sec_system.atoms.positions[0][1].magnitude == approx(4.3583475749937473e-10) + # ModelSystem + assert len(simulation.model_system) == 1 + model_system = simulation.model_system[0] + assert model_system.is_representative + # Cell + assert len(model_system.cell) == 1 + atomic_cell = model_system.cell[0] + assert np.isclose( + atomic_cell.positions[3].to('angstrom').magnitude, + np.array([3.57828732, 5.39462129, 5.22149125]), + ).all() + assert np.isclose( + atomic_cell.lattice_vectors.to('angstrom').magnitude, + np.array( + [ + [5.29177211e00, 0.00000000e00, 0.00000000e00], + [3.24027589e-16, 5.29177211e00, 0.00000000e00], + [3.24027589e-16, 3.24027589e-16, 5.29177211e00], + ] + ), + ).all() + assert atomic_cell.periodic_boundary_conditions == [True, True, True] + # AtomsState + assert len(atomic_cell.atoms_state) == 9 + labels = ['H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'O'] + for index, symbol in enumerate(labels): + assert atomic_cell.atoms_state[index].chemical_symbol == symbol - # Method testing - assert len(sec_run.method) == 1 - sec_method = sec_run.method[-1] - assert sec_method.label == 'NMR' - assert sec_method.dft.xc_functional.exchange[-1].name == 'LDA_X_PZ' - assert sec_method.dft.xc_functional.correlation[-1].name == 'LDA_C_PZ' - assert (sec_method.k_mesh.grid == np.array([1, 1, 1])).all() - assert sec_method.electrons_representation[-1].type == 'plane waves' + # ModelMethod + assert len(simulation.model_method) == 1 + assert simulation.model_method[0].m_def.name == 'DFT' + assert simulation.model_method[0].name == 'NMR' + dft = simulation.model_method[0] + assert len(dft.xc_functionals) == 2 + assert dft.xc_functionals[0].name == 'correlation' + assert dft.xc_functionals[0].libxc_name == 'LDA_C_PZ' + assert dft.xc_functionals[1].name == 'exchange' + assert dft.xc_functionals[1].libxc_name == 'LDA_X_PZ' + # NumericalSettings + assert len(dft.numerical_settings) == 1 + assert dft.numerical_settings[0].m_def.name == 'KSpace' + k_space = dft.numerical_settings[0] + # KMesh + assert len(k_space.k_mesh) == 1 + assert (k_space.k_mesh[0].grid == [1, 1, 1]).all() + assert (k_space.k_mesh[0].offset == [0.25, 0.25, 0.25]).all() - # Calculation testing - assert len(sec_run.calculation) == 1 - sec_calc = sec_run.calculation[-1] - assert sec_calc.system_ref == sec_system - assert sec_calc.method_ref == sec_method - assert sec_calc.magnetic_shielding and sec_calc.electric_field_gradient - assert not sec_calc.spin_spin_coupling and not sec_calc.magnetic_susceptibility - # Magnetic shielding testing - assert len(sec_calc.magnetic_shielding) == 1 - sec_ms = sec_calc.magnetic_shielding[-1] - assert sec_ms.atoms.shape == (9, 2) - assert (sec_ms.atoms[3] == ['H', '4']).all() - assert sec_ms.value.shape == (9, 3, 3) - assert sec_ms.value[4][2][1] == approx(-8.661757088509511e-06) - assert sec_ms.isotropic_value.shape == (9,) - assert sec_ms.isotropic_value[4] == approx(3.035708828276491e-05) - # Electric field gradient testing - assert len(sec_calc.electric_field_gradient) == 1 - sec_efg = sec_calc.electric_field_gradient[-1] - assert sec_efg.contribution == 'total' - assert sec_efg.value.shape == sec_ms.value.shape - assert sec_efg.value[4][2][1].magnitude == approx(-3.0317252106856217e21) + # Outputs + assert len(simulation.outputs) == 1 + output = simulation.outputs[0] + assert output.model_system_ref == model_system + assert output.model_method_ref == dft + # Properties + assert output.m_xpath('magnetic_shieldings', dict=False) is not None + assert len(output.m_xpath('magnetic_shieldings', dict=False)) == 9 # per atom + for property_name in [ + 'electric_field_gradients', + 'spin_spin_couplings', + 'magnetic_susceptibilities', + ]: + assert output.m_xpath(property_name, dict=False) is None + # MagneticShieldingTensor + for i, ms in enumerate(output.magnetic_shieldings): + assert ms.entity_ref.chemical_symbol == labels[i] + assert np.isclose( + output.magnetic_shieldings[3].value.magnitude, + np.array( + [ + [3.15771355e-05, -5.88661144e-07, 1.53864065e-06], + [-4.68026860e-07, 2.06392827e-05, 2.43151206e-06], + [7.98507383e-08, 9.14578022e-07, 2.48414650e-05], + ] + ), + ).all()