Skip to content

Commit

Permalink
Adding workflow parsing from VASP+Wannier90
Browse files Browse the repository at this point in the history
  • Loading branch information
JosePizarro3 committed Sep 17, 2024
1 parent 8f1b05e commit 96beebe
Show file tree
Hide file tree
Showing 8 changed files with 1,020,309 additions and 27 deletions.
75 changes: 72 additions & 3 deletions src/nomad_parser_wannier90/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,14 @@
KMesh as ModelKMesh,
)
from nomad_simulations.schema_packages.outputs import Outputs
from simulationworkflowschema import SinglePoint

# from nomad_simulations.schema_packages.utils import check_simulation_cell
from nomad_parser_wannier90.parsers.band_parser import Wannier90BandParser
from nomad_parser_wannier90.parsers.dos_parser import Wannier90DosParser
from nomad_parser_wannier90.parsers.hr_parser import Wannier90HrParser
from nomad_parser_wannier90.parsers.utils import get_files
from nomad_parser_wannier90.parsers.utils import get_files, parse_dft_plus_tb_workflow
from nomad_parser_wannier90.parsers.win_parser import Wannier90WInParser
from nomad_parser_wannier90.schema_packages.package import SinglePoint

re_n = r'[\n\r]'

Expand Down Expand Up @@ -448,6 +449,17 @@ def parse_outputs(self, simulation: Simulation, logger: 'BoundLogger') -> Output

return outputs

def get_mainfile_keys(self, **kwargs):
"""
Generates extra `child_archives` to create the DFT+TB workflow if some conditions are met.
"""
filepath = kwargs.get('filename')
mainfile = os.path.basename(filepath)
wannier90_files = get_files('*.wout', filepath, mainfile, deep=False)
if len(wannier90_files) == 1:
return ['DMFT_workflow']
return True

def parse(
self, filepath: str, archive: EntryArchive, logger: 'BoundLogger'
) -> None:
Expand Down Expand Up @@ -494,6 +506,63 @@ def parse(
simulation.outputs.append(outputs)

# Workflow section
# TODO extend to handle DFT+TB workflows using `self._dft_codes`
workflow = SinglePoint()
self.archive.workflow2 = workflow

# TODO extend to handle DFT+TB workflows using `self._dft_codes`
# Checking if other mainfiles are present, if the closest is a DFT code, tries to create the
# DFT+TB workflow and link it with the corresponding Wannier90 entry
vasprun_files = get_files(
pattern='*vasprun.xml',
filepath=self.mainfile,
stripname=self.basename,
deep=False,
)
outcar_files = get_files(
pattern='*OUTCAR',
filepath=self.mainfile,
stripname=self.basename,
deep=False,
)
dft_files = []
if len(vasprun_files) == 0:
dft_files = outcar_files
elif len(outcar_files) == 0:
dft_files = vasprun_files
if len(dft_files) == 1:
dft_path = dft_files[-1].split('raw/')[-1]
filepath_stripped = self.filepath.split('raw/')[-1]
try:
# For automatic workflows
from nomad.app.v1.models import MetadataRequired
from nomad.search import search

upload_id = self.archive.metadata.upload_id
search_ids = search(
owner='visible',
user_id=self.archive.metadata.main_author.user_id,
query={'upload_id': upload_id},
required=MetadataRequired(include=['entry_id', 'mainfile']),
).data
metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids]
if len(metadata) > 1:
for entry_id, mainfile in metadata:
if (
mainfile == filepath_stripped
): # we skipped the current parsed mainfile
continue
entry_archive = self.archive.m_context.load_archive(
entry_id, upload_id, None
)
if dft_path == mainfile:
dft_archive = entry_archive
dft_plus_tb_archive = self._child_archives.get(
'DFT_plus_TB_workflow'
)
dft_plus_tb = parse_dft_plus_tb_workflow(
dft_archive=dft_archive, tb_archive=self.archive
)
dft_plus_tb_archive.workflow2 = dft_plus_tb
break
except Exception:
logger.warning('Could not resolve the DFT+TB workflow for Wannier90.')
2 changes: 1 addition & 1 deletion src/nomad_parser_wannier90/parsers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .utils import get_files
from .utils import get_files, parse_dft_plus_tb_workflow
57 changes: 39 additions & 18 deletions src/nomad_parser_wannier90/parsers/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD.
# See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from nomad.datamodel.datamodel import EntryArchive

import os
from glob import glob

from nomad.datamodel.metainfo.workflow import TaskReference

from nomad_parser_wannier90.schema_packages.package import DFTPlusTB


def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = True):
"""Get files following the `pattern` with respect to the file `stripname` (usually this
Expand Down Expand Up @@ -49,3 +39,34 @@ def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = Tru

filenames = [f for f in filenames if os.access(f, os.F_OK)]
return filenames


def parse_dft_plus_tb_workflow(
dft_archive: 'EntryArchive', tb_archive: 'EntryArchive'
) -> DFTPlusTB:
"""
Parses the DFT+TB workflow by using the DFT and TB archives.
Args:
dft_archive (EntryArchive): The DFT archive.
tb_archive (EntryArchive): The TB archive.
Returns:
DFTPlusTB: The parsed DFT+TB workflow section.
"""
dft_plus_tb = DFTPlusTB()

if not dft_archive.workflow2 or not tb_archive.workflow2:
return

dft_task = dft_archive.workflow2
tb_task = tb_archive.workflow2

dft_plus_tb.inputs = dft_task.inputs[0]
dft_plus_tb.outputs = tb_task.outputs[-1]
dft_plus_tb.tasks = [
TaskReference(task=dft_task),
TaskReference(task=tb_task),
]

return dft_plus_tb
Loading

0 comments on commit 96beebe

Please sign in to comment.