From 35a7957f98cf4e0a1d31b7b04a5b791c693cd5e2 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Mon, 23 Sep 2024 10:20:42 +0200 Subject: [PATCH] Added new workflow from nomad_simulations and changed pyproject toml Deleted copyright comment at the beginning of the files Defined workflow_dft_files in Wannier90Parser --- pyproject.toml | 3 +- .../parsers/band_parser.py | 19 - .../parsers/dos_parser.py | 19 - .../parsers/hr_parser.py | 19 - src/nomad_parser_wannier90/parsers/parser.py | 110 +++--- .../parsers/utils/utils.py | 3 +- .../parsers/win_parser.py | 19 - .../schema_packages/package.py | 328 +----------------- tests/__init__.py | 18 - tests/test_parser.py | 19 +- 10 files changed, 69 insertions(+), 488 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aad86e6..ea65554 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ maintainers = [ license = { file = "LICENSE" } dependencies = [ "nomad-lab>=1.3.0", - "nomad-simulations>=0.0.3", + "nomad-simulations@git+https://github.com/nomad-coe/nomad-simulations.git@64ae0efb323cf17b20651f9a9a50864832db9a71", ] [project.urls] @@ -40,7 +40,6 @@ dev = [ "pytest-timeout", "pytest-cov", "structlog", - "nomad-lab[infrastructure]", # for search and MetadataRequired to work ] [tool.ruff] diff --git a/src/nomad_parser_wannier90/parsers/band_parser.py b/src/nomad_parser_wannier90/parsers/band_parser.py index b7f5477..2866078 100644 --- a/src/nomad_parser_wannier90/parsers/band_parser.py +++ b/src/nomad_parser_wannier90/parsers/band_parser.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: diff --git a/src/nomad_parser_wannier90/parsers/dos_parser.py b/src/nomad_parser_wannier90/parsers/dos_parser.py index 2baa43a..ea75af7 100644 --- a/src/nomad_parser_wannier90/parsers/dos_parser.py +++ b/src/nomad_parser_wannier90/parsers/dos_parser.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import Optional import numpy as np diff --git a/src/nomad_parser_wannier90/parsers/hr_parser.py b/src/nomad_parser_wannier90/parsers/hr_parser.py index e084eda..172d19f 100644 --- a/src/nomad_parser_wannier90/parsers/hr_parser.py +++ b/src/nomad_parser_wannier90/parsers/hr_parser.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: diff --git a/src/nomad_parser_wannier90/parsers/parser.py b/src/nomad_parser_wannier90/parsers/parser.py index c252a5d..0d6b9af 100644 --- a/src/nomad_parser_wannier90/parsers/parser.py +++ b/src/nomad_parser_wannier90/parsers/parser.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - import os from typing import TYPE_CHECKING, Optional @@ -49,6 +30,7 @@ KMesh as ModelKMesh, ) from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.workflow import SinglePoint # from nomad_simulations.schema_packages.utils import check_simulation_cell from nomad_parser_wannier90.parsers.band_parser import Wannier90BandParser @@ -56,7 +38,6 @@ from nomad_parser_wannier90.parsers.hr_parser import Wannier90HrParser from nomad_parser_wannier90.parsers.utils import get_files, parse_dft_plus_tb_workflow from nomad_parser_wannier90.parsers.win_parser import Wannier90WInParser -from nomad_parser_wannier90.schema_packages.package import SinglePoint re_n = r'[\n\r]' @@ -449,16 +430,51 @@ def parse_outputs(self, simulation: Simulation, logger: 'BoundLogger') -> Output return outputs + def workflow_dft_files(self, **kwargs) -> list[str]: + """ + Check if in the upload of the Wannier90 mainfile, there are the corresponding DFT files to create the + DFT+TB workflow, and returns them if so. Implemented only for VASP DFT files. + + Returns: + list[str]: Returns a list containing the DFT files if they are present, otherwise an empty list. + """ + # Wannier90 file + wannier90_mainfile = kwargs.get('filename') + wannier90_basename = os.path.basename(wannier90_mainfile) + + # DFT files + dft_files = [] + + # VASP DFT files + vasprun_files = get_files( + pattern='*vasprun.xml', + filepath=wannier90_mainfile, + stripname=wannier90_basename, + deep=False, + ) + outcar_files = get_files( + pattern='*OUTCAR', + filepath=wannier90_mainfile, + stripname=wannier90_basename, + deep=False, + ) + if not vasprun_files: + dft_files = outcar_files + else: + dft_files = vasprun_files + + # TODO extend to other DFT codes + + return dft_files + def get_mainfile_keys(self, **kwargs): """ - Generates extra `child_archives` to create the DFT+TB workflow if some conditions are met. + Generates extra `child_archives` to create the DFT+TB workflow if the conditions in `workflow_dft_files` are met. """ - filepath = kwargs.get('filename') - mainfile = os.path.basename(filepath) - wannier90_files = get_files('*.wout', filepath, mainfile, deep=False) - if len(wannier90_files) == 1: - return ['DMFT_workflow'] - return True + dft_files = self.workflow_dft_files(**kwargs) + if not dft_files: + return True + return ['DFTPlusTB_workflow'] def parse( self, filepath: str, archive: EntryArchive, logger: 'BoundLogger' @@ -512,23 +528,7 @@ def parse( # TODO extend to handle DFT+TB workflows using `self._dft_codes` # Checking if other mainfiles are present, if the closest is a DFT code, tries to create the # DFT+TB workflow and link it with the corresponding Wannier90 entry - vasprun_files = get_files( - pattern='*vasprun.xml', - filepath=self.mainfile, - stripname=self.basename, - deep=False, - ) - outcar_files = get_files( - pattern='*OUTCAR', - filepath=self.mainfile, - stripname=self.basename, - deep=False, - ) - dft_files = [] - if len(vasprun_files) == 0: - dft_files = outcar_files - elif len(outcar_files) == 0: - dft_files = vasprun_files + dft_files = self.workflow_dft_files(filename=self.mainfile) if len(dft_files) == 1: dft_path = dft_files[-1].split('raw/')[-1] filepath_stripped = self.filepath.split('raw/')[-1] @@ -556,8 +556,28 @@ def parse( ) if dft_path == mainfile: dft_archive = entry_archive + + # check if the simulation cell is the same + dft_cell = dft_archive.m_xpath( + 'data.model_system[-1].cell[0]' + ) + tb_cell = self.archive.m_xpath( + 'data.model_system[-1].cell[0]' + ) + if dft_cell is not None and tb_cell is not None: + if dft_cell != tb_cell: + logger.warning( + 'The DFT and TB cells do not coincide. We might be connecting wrongly the DFT and TB tasks.' + ) + else: + logger.warning( + 'Could not resolve the DFT and TB cells.' + ) + return + + # Parse the workflow information dft_plus_tb_archive = self._child_archives.get( - 'DFT_plus_TB_workflow' + 'DFTPlusTB_workflow' ) dft_plus_tb = parse_dft_plus_tb_workflow( dft_archive=dft_archive, tb_archive=self.archive diff --git a/src/nomad_parser_wannier90/parsers/utils/utils.py b/src/nomad_parser_wannier90/parsers/utils/utils.py index 82ca773..b4f9178 100644 --- a/src/nomad_parser_wannier90/parsers/utils/utils.py +++ b/src/nomad_parser_wannier90/parsers/utils/utils.py @@ -7,8 +7,7 @@ from glob import glob from nomad.datamodel.metainfo.workflow import TaskReference - -from nomad_parser_wannier90.schema_packages.package import DFTPlusTB +from nomad_simulations.schema_packages.workflow import DFTPlusTB def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = True): diff --git a/src/nomad_parser_wannier90/parsers/win_parser.py b/src/nomad_parser_wannier90/parsers/win_parser.py index 217da90..f7dd19d 100644 --- a/src/nomad_parser_wannier90/parsers/win_parser.py +++ b/src/nomad_parser_wannier90/parsers/win_parser.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: diff --git a/src/nomad_parser_wannier90/schema_packages/package.py b/src/nomad_parser_wannier90/schema_packages/package.py index 2dff4f1..07a7b0d 100644 --- a/src/nomad_parser_wannier90/schema_packages/package.py +++ b/src/nomad_parser_wannier90/schema_packages/package.py @@ -1,19 +1,5 @@ -from typing import TYPE_CHECKING, Optional - -import numpy as np - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import EntryArchive - from structlog.stdlib import BoundLogger - from nomad.config import config -from nomad.datamodel.data import ArchiveSection -from nomad.datamodel.metainfo.workflow import Link, Task, Workflow -from nomad.metainfo import Quantity, Reference, SchemaPackage, SubSection -from nomad_simulations.schema_packages.model_method import BaseModelMethod -from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs -from nomad_simulations.schema_packages.properties import FermiLevel +from nomad.metainfo import SchemaPackage configuration = config.get_plugin_entry_point( 'nomad_parser_wannier90.schema_packages:nomad_parser_wannier90_schema' @@ -22,316 +8,4 @@ m_package = SchemaPackage() -class SimulationWorkflow(Workflow): - """ - A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The - normalize function checks the definition of these sections and sets the name of the workflow. - - A `SimulationWorkflow` will be composed of: - - a `method` section containing methodological parameters used specifically during the workflow, - - a list of `inputs` with references to the `ModelSystem` or `ModelMethod` input sections, - - a list of `outputs` with references to the `Outputs` section, - - a list of `tasks` containing references to the activity `Simulation` used in the workflow, - """ - - method = SubSection( - sub_section=BaseModelMethod.m_def, - description="""Methodological parameters used during the workflow.""", - ) - - def resolve_inputs_outputs( - self, archive: 'EntryArchive', logger: 'BoundLogger' - ) -> None: - """ - Resolves the `inputs` and `outputs` sections from the archive sections under `data` and stores - them in private attributes. - - Args: - archive (EntryArchive): The archive to resolve the sections from. - logger (BoundLogger): The logger to log messages. - """ - if ( - not archive.data.model_system - or not archive.data.model_method - or not archive.data.outputs - ): - logger.info( - '`ModelSystem`, `ModelMethod` and `Outputs` required for normalization of `SimulationWorkflow`.' - ) - return None - self._input_systems = archive.data.model_system - self._input_methods = archive.data.model_method - self._outputs = archive.data.outputs - - # Resolve `inputs` - if not self.inputs: - self.m_add_sub_section( - Workflow.inputs, - Link(name='Input Model System', section=self._input_systems[0]), - ) - # Resolve `outputs` - if not self.outputs: - self.m_add_sub_section( - Workflow.outputs, - Link(name='Output Data', section=self._outputs[-1]), - ) - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - # Resolve the `inputs` and `outputs` from the archive - self.resolve_inputs_outputs(archive=archive, logger=logger) - - # Storing the initial `ModelSystem` - for link in self.inputs: - if isinstance(link.section, ModelSystem): - self.initial_structure = link.section - break - - -class SinglePoint(SimulationWorkflow): - """ - A `SimulationWorkflow` used to represent a single point calculation workflow. The `SinglePoint` - workflow is the minimum workflow required to represent a simulation. The self-consistent steps of - scf simulation are represented in the `SinglePoint` workflow. - """ - - n_scf_steps = Quantity( - type=np.int32, - description=""" - The number of self-consistent field (SCF) steps in the simulation. - """, - ) - - def generate_task(self) -> Task: - """ - Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`. - - Returns: - Task: The generated `Task` section. - """ - task = Task() - if self._input_systems is not None and len(self._input_systems) > 0: - task.m_add_sub_section( - Task.inputs, - Link(name='Input Model System', section=self._input_systems[0]), - ) - if self._input_methods is not None and len(self._input_methods) > 0: - task.m_add_sub_section( - Task.inputs, - Link(name='Input Model Method', section=self._input_methods[0]), - ) - if self._outputs is not None and len(self._outputs) > 0: - task.m_add_sub_section( - Task.outputs, - Link(name='Output Data', section=self._outputs[-1]), - ) - return task - - def resolve_n_scf_steps(self) -> int: - """ - Resolves the number of self-consistent field (SCF) steps in the simulation. - - Returns: - int: The number of SCF steps. - """ - for output in self.outputs: - if not isinstance(output, SCFOutputs): - continue - if output.scf_steps is not None: - return len(output.scf_steps) - return 1 - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - if self.tasks is not None and len(self.tasks) > 1: - logger.error('A `SinglePoint` workflow must have only one task.') - return - - # Generate the `tasks` section if this does not exist - if not self.tasks: - task = self.generate_task() - self.tasks.append(task) - - # Resolve `n_scf_steps` - self.n_scf_steps = self.resolve_n_scf_steps() - - -class BeyondDFTMethod(ArchiveSection): - """ - An abstract section used to store references to the `ModelMethod` sections of each of the - archives defining the `tasks` and used to build the standard workflow. This section needs to be - inherit and the method references need to be defined for each specific case. - """ - - def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]: - """ - Resolves the `ModelMethod` reference for the `task`. - - Args: - task (Task): The task to resolve the `ModelMethod` reference from. - - Returns: - Optional[BaseModelMethod]: The resolved `ModelMethod` reference. - """ - for input in task.inputs: - if input.name == 'Input Model Method': - return input.section - return None - - -class BeyondDFTWorkflow(SimulationWorkflow): - method = SubSection(sub_section=BeyondDFTMethod.m_def) - - def resolve_all_outputs(self) -> list[Outputs]: - """ - Resolves all the `Outputs` sections from the `tasks` in the workflow. This is useful when - the workflow is composed of multiple tasks and the outputs need to be stored in a list - for further manipulation, e.g., to plot multiple band structures in a DFT+TB workflow. - - Returns: - list[Outputs]: A list of all the `Outputs` sections from the `tasks`. - """ - all_outputs = [] - for task in self.tasks: - all_outputs.append(task.outputs[-1]) - return all_outputs - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - -class DFTPlusTBMethod(BeyondDFTMethod): - """ - Section used to reference the `DFT` and `TB` `ModelMethod` sections in each of the archives - conforming a DFT+TB simulation workflow. - """ - - dft_method_ref = Quantity( - type=Reference(BaseModelMethod), - description="""Reference to the DFT `ModelMethod` section in the DFT task.""", - ) - tb_method_ref = Quantity( - type=Reference(BaseModelMethod), - description="""Reference to the GW `ModelMethod` section in the TB task.""", - ) - - -class DFTPlusTB(BeyondDFTWorkflow): - """ - DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This - workflow section is used to define the same energy reference for both the DFT and TB calculations, by - setting it up to the DFT calculation. The structure of the workflow is: - - - `self.inputs[0]`: the initial `ModelSystem` section in the DFT entry, - - `self.outputs[0]`: the outputs section in the TB entry, - - `tasks[0]`: - - `tasks[0].task` (TaskReference): the reference to the `SinglePoint` task in the DFT entry, - - `tasks[0].inputs[0]`: the initial `ModelSystem` section in the DFT entry, - - `tasks[0].outputs[0]`: the outputs section in the DFT entry, - - `tasks[1]`: - - `tasks[1].task` (TaskReference): the reference to the `SinglePoint` task in the TB entry, - - `tasks[1].inputs[0]`: the outputs section in the DFT entry, - - `tasks[1].outputs[0]`: the outputs section in the TB entry, - - `method`: references to the `ModelMethod` sections in the DFT and TB entries. - """ - - def resolve_method(self) -> DFTPlusTBMethod: - """ - Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the - `resolve_beyonddft_method_ref` method from the `BeyondDFTMethod` section. - - Returns: - DFTPlusTBMethod: The resolved `DFTPlusTBMethod` section. - """ - method = DFTPlusTBMethod() - - # DFT method reference - dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task) - if dft_method is not None: - method.dft_method_ref = dft_method - - # TB method reference - tb_method = method.resolve_beyonddft_method_ref(task=self.tasks[1].task) - if tb_method is not None: - method.tb_method_ref = tb_method - - return method - - def link_tasks(self) -> None: - """ - Links the `outputs` of the DFT task with the `inputs` of the TB task. - """ - dft_task = self.tasks[0] - dft_task.inputs = [ - Link( - name='Input Model System', - section=self.inputs[0], - ) - ] - dft_task.outputs = [ - Link( - name='Output DFT Data', - section=dft_task.outputs[-1], - ) - ] - - tb_task = self.tasks[1] - tb_task.inputs = [ - Link( - name='Output DFT Data', - section=dft_task.outputs[-1], - ), - ] - tb_task.outputs = [ - Link( - name='Output TB Data', - section=tb_task.outputs[-1], - ) - ] - - def overwrite_fermi_level(self) -> None: - """ - Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation. - """ - dft_output = self.tasks[0].outputs[-1] - if not dft_output.fermi_levels: - return None - fermi_level = dft_output.fermi_levels[-1] - - tb_output = self.tasks[1].outputs[-1] - tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value)) - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - # Initial check for the number of tasks - if len(self.tasks) != 2: - logger.error('A `DFTPlusTB` workflow must have two tasks.') - return - - # Check if tasks are `SinglePoint` - for task in self.tasks: - if task.m_def.name != 'SinglePoint': - logger.error( - 'A `DFTPlusTB` workflow must have two `SinglePoint` tasks.' - ) - return - - # Define names of the workflow and `tasks` - self.name = 'DFT+TB' - self.tasks[0].name = 'DFT SinglePoint' - self.tasks[1].name = 'TB SinglePoint' - - # Resolve method refs for each task and store under `method` - self.method = self.resolve_method() - - # Link the tasks - self.link_tasks() - - # Overwrite the Fermi level in the TB calculation - self.overwrite_fermi_level() - - m_package.__init_metainfo__() diff --git a/tests/__init__.py b/tests/__init__.py index 52e83b1..5cdfd19 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,21 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from nomad import utils logger = utils.get_logger(__name__) diff --git a/tests/test_parser.py b/tests/test_parser.py index d8bb035..1143327 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,27 +1,10 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - import os from typing import Optional import numpy as np import pytest from nomad.datamodel import EntryArchive + from nomad_parser_wannier90.parsers.parser import Wannier90Parser from . import logger