From e60167c3dbc4b41e996992a78bb29e2a6f674deb Mon Sep 17 00:00:00 2001 From: Alvin Noe Ladines Date: Fri, 25 Oct 2024 18:06:31 +0200 Subject: [PATCH 1/2] Initial implementation of mapping parser --- pyproject.toml | 4 +- .../parsers/mapping_parser.py | 173 ++++++++++++++++++ .../parsers/utils/utils.py | 2 +- .../schema_packages/package.py | 98 ++++++++++ 4 files changed, 274 insertions(+), 3 deletions(-) create mode 100644 src/nomad_parser_wannier90/parsers/mapping_parser.py diff --git a/pyproject.toml b/pyproject.toml index c1eb034..aa3fb81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,8 @@ maintainers = [ ] license = { file = "LICENSE" } dependencies = [ - "nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@6b7149a71b2999abbb2225fcb67a5acafc811806", - "nomad-simulations@git+https://github.com/nomad-coe/nomad-simulations.git@f337b734126bc9cfe824f40993cda46e35f7f1eb", + #"nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@ea0e5cbc632a514ea4d7e0bca1fab105d452f94c", + #"nomad-simulations@git+https://github.com/nomad-coe/nomad-simulations.git@6a7668bcda0c074f8696eddccf326973acff41ac", ] [project.urls] diff --git a/src/nomad_parser_wannier90/parsers/mapping_parser.py b/src/nomad_parser_wannier90/parsers/mapping_parser.py new file mode 100644 index 0000000..83b6cf6 --- /dev/null +++ b/src/nomad_parser_wannier90/parsers/mapping_parser.py @@ -0,0 +1,173 @@ +import os +import re +from typing import TYPE_CHECKING, Optional, List, Dict, Any + +import numpy as np +from nomad.config import config + +if TYPE_CHECKING: + from nomad.datamodel import EntryArchive + from structlog.stdlib import BoundLogger + +from .parser import WOutParser +from .win_parser import WInParser +from nomad.parsing.file_parser.mapping_parser import TextParser, MetainfoParser +from nomad.parsing.file_parser.text_parser import DataTextParser +from nomad_parser_wannier90.schema_packages.package import Simulation, IN_ANNOTATION_KEY, OUT_ANNOTATION_KEY, BAND_ANNOTATION_KEY +from nomad_simulations.schema_packages.workflow import SinglePoint +from .utils import get_files, parse_dft_plus_tb_workflow + +re_n = r'[\n\r]' + +configuration = config.get_plugin_entry_point( + 'nomad_parser_wannier90.parsers:parser_entry_point' +) + + +class WBandTextParser(TextParser): + + def get_data(self, data: np.ndarray) -> np.ndarray: + return np.transpose(data)[1:].transpose() + + + +class WOutTextParser(TextParser): + + def get_lattice_vectors(self, vectors: List[Any]) -> np.ndarray: + return np.vstack(vectors[-3:]) + + def is_maximally_localized(self, niter: int, default=0) -> bool: + return (niter or default) > 1 + + + +class WInTextParser(TextParser): + + def get_projections(self, source: List[Any]) -> List[Dict[str, Any]]: + return [dict(atom=val) for val in source] + + def get_branch_label_indices(self, atom: List[Any], positions: List[np.ndarray], labels: List[str], lattice_vectors: List[np.ndarray]) -> Any: + + symbols, indices = [], [] + if not atom: + return None + + elif isinstance(atom[0], int): + indices = atom + + elif match := re.match(r'([cf])=(.+?),(.+?),(.+)', atom[0]): + coord = match.groups()[0] + position = np.array(match.groups()[1:4], float) + if coord.lower() == 'f': + position = np.dot(position, lattice_vectors) + for n, pos in enumerate(positions): + if np.allclose(position, pos, configuration.equal_cell_positions_tolerance): + indices.append(n) + symbols.append(labels[n]) + + return dict(label=''.join(symbols), indices=indices) + + +class Wannier90Parser: + def get_dft_files(self, mainfile:str) -> List[str]: + for filename in ['vasprun.xml', 'OUTCAR']: + files = get_files( + pattern=f'*{filename}', + filepath=mainfile, + stripname=os.path.basename(mainfile), + deep=False + ) + if files: + return files + return [] + + def get_mainfile_keys(self, **kwargs): + """ + Generates extra `child_archives` to create the DFT+TB workflow if the conditions in `workflow_dft_files` are met. + """ + dft_files = self.get_dft_files(kwargs.get('filename', '')) + return ['DFTPlusTB_workflow'] if dft_files else True + + def parse(self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger', child_archives: Dict[str, 'EntryArchive'] = {}) -> None: + # define mapping parser interface to OutParser + wout_parser = WOutTextParser(text_parser=WOutParser()) + wout_parser.filepath = mainfile + + # construct metainfo parser + data = Simulation() + data_parser = MetainfoParser() + data_parser.annotation_key = OUT_ANNOTATION_KEY + data_parser.data_object = data + + wout_parser.convert(data_parser) + archive.data = data + + # parse input file + win_parser = WInTextParser(text_parser=WInParser()) + if data.model_system: + win_files = get_files( + pattern='*.win', filepath=mainfile, stripname=os.path.basename(mainfile) + ) + if len(win_files) > 1: + logger.warning( + 'Multiple `*.win` files found. We will parse the first one.' + ) + if win_files is not None: + win_parser.filepath = win_files[0] + # need data from out + for key in ['structure', 'lattice_vectors']: + win_parser.data[key] = wout_parser.data.get(key) + data_parser.annotation_key = IN_ANNOTATION_KEY + data_parser.data_object = data + win_parser.convert(data_parser) + + wband_parser = WBandTextParser(text_parser=DataTextParser()) + # parse band file + band_files = get_files( + pattern='*band.dat', filepath=mainfile, stripname=os.path.basename(mainfile) + ) + for band_file in band_files: + wband_parser.filepath = band_file + wband_parser.data_object.parse('data') + data_parser.annotation_key = BAND_ANNOTATION_KEY + data_parser.data_object = data + wband_parser.convert(data_parser) + + workflow = SinglePoint() + workflow.normalize(archive=archive, logger=logger) + archive.workflow2 = workflow + + # workflow + if child_archives: + from nomad.app.v1.routers.uploads import get_upload_with_read_access + from nomad.datamodel import User + + upload_id = archive.metadata.upload_id + upload = get_upload_with_read_access( + upload_id=upload_id, + user=User.get(user_id=archive.metadata.main_author.user_id), + ) + dft_archive = None + dft_files = self.get_dft_files(mainfile) + dft_path = dft_files[-1].split('raw/')[-1] + with upload.entries_metadata() as entries_metadata: + for metadata in entries_metadata: + if metadata.mainfile == dft_path: + dft_archive = upload.get_entry(metadata.entry_id)._parser_results + break + dft_plus_tb_archive = child_archives.get( + 'DFTPlusTB_workflow' + ) + dft_plus_tb = parse_dft_plus_tb_workflow( + dft_archive=dft_archive, tb_archive=archive + ) + dft_plus_tb_archive.workflow2 = dft_plus_tb + + # debug + self.wout_parser = wout_parser + self.data_parser = data_parser + self.win_parser = win_parser + self.wband_parser = wband_parser + # close parser contexts + # wout_parser.close() + # data_parser.close() diff --git a/src/nomad_parser_wannier90/parsers/utils/utils.py b/src/nomad_parser_wannier90/parsers/utils/utils.py index edc476e..b4c498b 100644 --- a/src/nomad_parser_wannier90/parsers/utils/utils.py +++ b/src/nomad_parser_wannier90/parsers/utils/utils.py @@ -6,7 +6,7 @@ import os from glob import glob -from nomad.datamodel.metainfo.workflow_new import TaskReference2 as TaskReference +# from nomad.datamodel.metainfo.workflow_new import TaskReference2 as TaskReference from nomad_simulations.schema_packages.workflow import DFTPlusTB diff --git a/src/nomad_parser_wannier90/schema_packages/package.py b/src/nomad_parser_wannier90/schema_packages/package.py index 89331a1..cb3d303 100644 --- a/src/nomad_parser_wannier90/schema_packages/package.py +++ b/src/nomad_parser_wannier90/schema_packages/package.py @@ -1,6 +1,10 @@ from nomad.config import config from nomad.metainfo import SchemaPackage +from nomad_simulations.schema_packages import general, model_system, model_method, numerical_settings, outputs, properties +from nomad.parsing.file_parser.mapping_parser import MappingAnnotationModel + + configuration = config.get_plugin_entry_point( 'nomad_parser_wannier90.schema_packages:schema_package_entry_point' ) @@ -8,4 +12,98 @@ m_package = SchemaPackage() +OUT_ANNOTATION_KEY = 'out' +IN_ANNOTATION_KEY = 'in' +BAND_ANNOTATION_KEY = 'band' + + +class Program(general.Program): + + general.Program.version.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.version') + +class AtomsState(model_system.AtomsState): + + model_system.AtomsState.chemical_symbol.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + +class AtomicCell(model_system.AtomicCell): + + model_system.AtomicCell.atoms_state.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.labels') + + model_system.AtomicCell.positions.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.positions', unit='angstrom') + + model_system.AtomicCell.lattice_vectors.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_lattice_vectors', ['lattice_vectors']), unit='angstrom') + + +class ModelSystem(model_system.ModelSystem): + + model_system.AtomicCell.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + model_system.ModelSystem.model_system.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_projections', ['.projections'])) + + model_system.ModelSystem.branch_label.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.atom', 'structure.positions', 'structure.labels','lattice_vectors']), search='label', cache=True) + + model_system.ModelSystem.atom_indices.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.atom', 'structure.positions', 'structure.labels','lattice_vectors']), search='indices') + + +class KMesh(numerical_settings.KMesh): + + numerical_settings.KMesh.n_points.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.n_points') + + +class KSpace(numerical_settings.KSpace): + + numerical_settings.KSpace.k_mesh.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.k_mesh') + + +class ModelMethod(model_method.ModelMethod): + + numerical_settings.KSpace.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + +class Wannier(model_method.Wannier): + + model_method.Wannier.is_maximally_localized.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('is_maximally_localized', ['.Niter'], dict(default=0))) + + model_method.Wannier.energy_window_outer.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.energy_windows.outer') + + model_method.Wannier.n_orbitals.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.Wannier') + + +class ElectronicBandStructure(properties.ElectronicBandStructure): + + properties.ElectronicBandStructure.n_bands.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.Nwannier') + + properties.ElectronicBandStructure.value.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_data', ['.data'])) + + +class Outputs(outputs.Outputs): + + outputs.Outputs.electronic_band_structures.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + outputs.Outputs.electronic_band_structures.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + +class Simulation(general.Simulation): + + general.Simulation.program.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + general.Simulation.model_system.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.structure') + + general.Simulation.model_system.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + model_method.Wannier.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + general.Simulation.outputs.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + general.Simulation.outputs.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + +Simulation.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') + +Simulation.m_def.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') + +Simulation.m_def.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') + + m_package.__init_metainfo__() From 4ce4c96099ebb7ae9dcf037ad8e383f2486de36e Mon Sep 17 00:00:00 2001 From: Alvin Noe Ladines Date: Thu, 31 Oct 2024 15:51:39 +0100 Subject: [PATCH 2/2] Extend --- .../parsers/mapping_parser.py | 256 +++++++++++++++--- .../schema_packages/package.py | 103 ++++++- 2 files changed, 318 insertions(+), 41 deletions(-) diff --git a/src/nomad_parser_wannier90/parsers/mapping_parser.py b/src/nomad_parser_wannier90/parsers/mapping_parser.py index 83b6cf6..5359262 100644 --- a/src/nomad_parser_wannier90/parsers/mapping_parser.py +++ b/src/nomad_parser_wannier90/parsers/mapping_parser.py @@ -1,6 +1,6 @@ import os import re -from typing import TYPE_CHECKING, Optional, List, Dict, Any +from typing import TYPE_CHECKING, Any, Dict, List, Optional import numpy as np from nomad.config import config @@ -9,13 +9,23 @@ from nomad.datamodel import EntryArchive from structlog.stdlib import BoundLogger -from .parser import WOutParser -from .win_parser import WInParser -from nomad.parsing.file_parser.mapping_parser import TextParser, MetainfoParser +from nomad.parsing.file_parser.mapping_parser import MetainfoParser, TextParser from nomad.parsing.file_parser.text_parser import DataTextParser -from nomad_parser_wannier90.schema_packages.package import Simulation, IN_ANNOTATION_KEY, OUT_ANNOTATION_KEY, BAND_ANNOTATION_KEY from nomad_simulations.schema_packages.workflow import SinglePoint + +from nomad_parser_wannier90.schema_packages.package import ( + BAND_ANNOTATION_KEY, + DOS_ANNOTATION_KEY, + HR_ANNOTATION_KEY, + IN_ANNOTATION_KEY, + OUT_ANNOTATION_KEY, + Simulation, +) + +from .hr_parser import HrParser +from .parser import WOutParser from .utils import get_files, parse_dft_plus_tb_workflow +from .win_parser import WInParser re_n = r'[\n\r]' @@ -24,61 +34,199 @@ ) -class WBandTextParser(TextParser): +class WHrTextParser(TextParser): + def get_hoppings(self, source: Dict[str, Any], **kwargs) -> Dict[str, Any]: + degeneracy_factors = source.get('degeneracy_factors')[2:] + full_hoppings = source.get('hoppings', []) + n_wigner_seitz_points = source.get('degeneracy_factors')[1] + n_orbitals = source.get('n_orbitals') + + hops = np.reshape( + full_hoppings, + (n_wigner_seitz_points, n_orbitals, n_orbitals, 7), + ) + + # storing the crystal field splitting values + ws0 = int((n_wigner_seitz_points - 1) / 2) + crystal_fields = [ + hops[ws0, i, i, 5] for i in range(n_orbitals) + ] # only real elements + + # delete repeated points for different orbitals + ws_points = hops[:, :, :, :3] + ws_points = np.unique(ws_points.reshape(-1, 3), axis=0) + + # passing hoppings + hoppings = hops[:, :, :, -2] + 1j * hops[:, :, :, -1] + result = dict( + degeneracy_factors=degeneracy_factors, + hoppings=hoppings, + crystal_fields=crystal_fields, + ) + if kwargs.get('ws'): + result.update(dict(ws_points=ws_points, n_ws_points=n_wigner_seitz_points)) + + return result + + +class WDosTextParser(TextParser): + def get_dos(self, source: np.ndarray) -> Dict[str, Any]: + data = np.transpose(source) + return dict(energies=data[0], value=data[1]) + +class WBandTextParser(TextParser): def get_data(self, data: np.ndarray) -> np.ndarray: return np.transpose(data)[1:].transpose() - class WOutTextParser(TextParser): - def get_lattice_vectors(self, vectors: List[Any]) -> np.ndarray: return np.vstack(vectors[-3:]) + def get_pbc(self, vectors: List[Any]) -> List[bool]: + return [vectors is not None] * 3 + def is_maximally_localized(self, niter: int, default=0) -> bool: return (niter or default) > 1 + def get_kpoints(self, points: np.ndarray) -> np.ndarray: + return np.complex128(points[::2]) + + def get_k_line_path(self, k_line_path: Dict[str, Any]): + high_symm_names = k_line_path.get('high_symm_name') + high_symm_values = [ + np.reshape(val, (2, 3)) for val in k_line_path.get('high_symm_value') + ] + # Start with the first element of the first pair + names = [high_symm_names[0][0]] + values = [high_symm_values[0][0]] + for i, pair in enumerate(high_symm_names): + # Add the second element if it's not the last one in the list + if pair[1] != names[-1]: + names.append(pair[1]) + values.append(high_symm_values[i][1]) + return dict(names=names, values=values) class WInTextParser(TextParser): + # TODO these should be defined in common utils + _l_symbols = ['s', 'p', 'd', 'f'] + _m_symbols = [ + None, + 'x', + 'y', + 'z', + 'z^2', + 'xz', + 'yz', + 'x^2-y^2', + 'xy', + 'z^3', + 'xz^2', + 'yz^2', + 'z(x^2-y^2)', + 'xyz', + 'x(x^2-3y^2)', + 'y(3x^2-y^2)', + ] + _wannier_symbols = [ + 's', + 'px', + 'py', + 'pz', + 'dz2', + 'dxz', + 'dyz', + 'dx2-y2', + 'dxy', + 'fz3', + 'fxz2', + 'fyz2', + 'fz(x2-y2)', + 'fxyz', + 'fx(x2-3y2)', + 'fy(3x2-y2)', + ] def get_projections(self, source: List[Any]) -> List[Dict[str, Any]]: - return [dict(atom=val) for val in source] - - def get_branch_label_indices(self, atom: List[Any], positions: List[np.ndarray], labels: List[str], lattice_vectors: List[np.ndarray]) -> Any: - + return [dict(projection=val) for val in source] + + def get_branch_label_indices( + self, + atom: Any, + positions: List[np.ndarray], + labels: List[str], + lattice_vectors: List[np.ndarray], + ) -> Any: symbols, indices = [], [] - if not atom: + if atom is None: return None - elif isinstance(atom[0], int): - indices = atom + elif isinstance(atom, int): + indices = [atom] - elif match := re.match(r'([cf])=(.+?),(.+?),(.+)', atom[0]): + elif match := re.match(r'([cf])=(.+?),(.+?),(.+)', atom): coord = match.groups()[0] position = np.array(match.groups()[1:4], float) if coord.lower() == 'f': position = np.dot(position, lattice_vectors) for n, pos in enumerate(positions): - if np.allclose(position, pos, configuration.equal_cell_positions_tolerance): + if np.allclose( + position, pos, configuration.equal_cell_positions_tolerance + ): indices.append(n) symbols.append(labels[n]) + elif isinstance(atom, str): + indices = [n for n, label in enumerate(labels) if label == atom] + symbols = [atom] + return dict(label=''.join(symbols), indices=indices) + def get_orbitals_state(self, orbital: Any) -> List[Dict[str, Any]]: + if orbital is None: + return None + + states = [] + orbitals = re.findall(r'l=([\d+])(?:,mr=([\d])+=)?', orbital) + for orb in orbitals: + nl = int(orb[0]) + states.append(dict(l=self._l_symbols[nl])) + if orb[1]: + nm = sum([len(range(-n, n + 1)) for n in range(nl)]) + int(orb[1]) + states[-1]['m'] = self._m_symbols[nm] + if not orbitals: + for orb in orbital.split(';'): + try: + norb = self._wannier_symbols.index(orb) + except Exception: + continue + # calculate l,m from norb + nl = 0 + nm = 0 + while True: + m_offset = [nm + nq for nq in range(len(range(-nl, nl + 1)))] + if norb in m_offset: + nm = m_offset.index(norb) + break + nl += 1 + nm += len(m_offset) + states.append(dict(l=self._l_symbols[nl], m=self._m_symbols[nm])) + return states + class Wannier90Parser: - def get_dft_files(self, mainfile:str) -> List[str]: + def get_dft_files(self, mainfile: str) -> List[str]: for filename in ['vasprun.xml', 'OUTCAR']: files = get_files( pattern=f'*{filename}', filepath=mainfile, stripname=os.path.basename(mainfile), - deep=False + deep=False, ) if files: - return files + return files return [] def get_mainfile_keys(self, **kwargs): @@ -88,7 +236,14 @@ def get_mainfile_keys(self, **kwargs): dft_files = self.get_dft_files(kwargs.get('filename', '')) return ['DFTPlusTB_workflow'] if dft_files else True - def parse(self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger', child_archives: Dict[str, 'EntryArchive'] = {}) -> None: + def parse( + self, + mainfile: str, + archive: 'EntryArchive', + logger: 'BoundLogger', + child_archives: Dict[str, 'EntryArchive'] = {}, + ) -> None: + basename = os.path.basename(mainfile) # define mapping parser interface to OutParser wout_parser = WOutTextParser(text_parser=WOutParser()) wout_parser.filepath = mainfile @@ -106,13 +261,13 @@ def parse(self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger', c win_parser = WInTextParser(text_parser=WInParser()) if data.model_system: win_files = get_files( - pattern='*.win', filepath=mainfile, stripname=os.path.basename(mainfile) + pattern='*.win', filepath=mainfile, stripname=basename ) if len(win_files) > 1: logger.warning( 'Multiple `*.win` files found. We will parse the first one.' ) - if win_files is not None: + if win_files: win_parser.filepath = win_files[0] # need data from out for key in ['structure', 'lattice_vectors']: @@ -121,10 +276,35 @@ def parse(self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger', c data_parser.data_object = data win_parser.convert(data_parser) + # parse hr files + whr_parser = WHrTextParser(text_parser=HrParser()) + hr_files = get_files(pattern='*hr.dat', filepath=mainfile, stripname=basename) + if len(hr_files) > 1: + logger.info('Multiple `*hr.dat` files found.') + for hr_file in hr_files: + whr_parser.filepath = hr_file + # need data from out + whr_parser.data['n_orbitals'] = wout_parser.data.get('Nwannier') + data_parser.annotation_key = HR_ANNOTATION_KEY + data_parser.data_object = data + whr_parser.convert(data_parser) + + # parse dos files + wdos_parser = WDosTextParser(text_parser=DataTextParser()) + dos_files = get_files(pattern='*dos.dat', filepath=mainfile, stripname=basename) + if len(dos_files) > 1: + logger.info('Multiple `*dos.dat` files found.') + for dos_file in dos_files: + wdos_parser.filepath = dos_file + wdos_parser.data_object.parse('data') + data_parser.annotation_key = DOS_ANNOTATION_KEY + data_parser.data_object = data + wdos_parser.convert(data_parser) + wband_parser = WBandTextParser(text_parser=DataTextParser()) - # parse band file + # parse band files band_files = get_files( - pattern='*band.dat', filepath=mainfile, stripname=os.path.basename(mainfile) + pattern='*band.dat', filepath=mainfile, stripname=basename ) for band_file in band_files: wband_parser.filepath = band_file @@ -153,21 +333,27 @@ def parse(self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger', c with upload.entries_metadata() as entries_metadata: for metadata in entries_metadata: if metadata.mainfile == dft_path: - dft_archive = upload.get_entry(metadata.entry_id)._parser_results + dft_archive = upload.get_entry( + metadata.entry_id + )._parser_results break - dft_plus_tb_archive = child_archives.get( - 'DFTPlusTB_workflow' - ) + dft_plus_tb_archive = child_archives.get('DFTPlusTB_workflow') dft_plus_tb = parse_dft_plus_tb_workflow( dft_archive=dft_archive, tb_archive=archive ) dft_plus_tb_archive.workflow2 = dft_plus_tb # debug - self.wout_parser = wout_parser - self.data_parser = data_parser - self.win_parser = win_parser - self.wband_parser = wband_parser + # self.wout_parser = wout_parser + # self.data_parser = data_parser + # self.win_parser = win_parser + # self.wdos_parser = wdos_parser + # self.wband_parser = wband_parser + # self.whr_parser = whr_parser # close parser contexts - # wout_parser.close() - # data_parser.close() + wout_parser.close() + data_parser.close() + win_parser.close() + wdos_parser.close() + wband_parser.close() + whr_parser.close() diff --git a/src/nomad_parser_wannier90/schema_packages/package.py b/src/nomad_parser_wannier90/schema_packages/package.py index cb3d303..a9baa67 100644 --- a/src/nomad_parser_wannier90/schema_packages/package.py +++ b/src/nomad_parser_wannier90/schema_packages/package.py @@ -1,9 +1,16 @@ from nomad.config import config from nomad.metainfo import SchemaPackage - -from nomad_simulations.schema_packages import general, model_system, model_method, numerical_settings, outputs, properties from nomad.parsing.file_parser.mapping_parser import MappingAnnotationModel - +from nomad_simulations.schema_packages import ( + atoms_state, + general, + model_method, + model_system, + numerical_settings, + outputs, + properties, + variables, +) configuration = config.get_plugin_entry_point( 'nomad_parser_wannier90.schema_packages:schema_package_entry_point' @@ -15,46 +22,76 @@ OUT_ANNOTATION_KEY = 'out' IN_ANNOTATION_KEY = 'in' BAND_ANNOTATION_KEY = 'band' +HR_ANNOTATION_KEY = 'hr' +DOS_ANNOTATION_KEY = 'dos' class Program(general.Program): general.Program.version.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.version') + +class OrbitalsState(atoms_state.OrbitalsState): + + atoms_state.OrbitalsState.l_quantum_symbol.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.l') + + atoms_state.OrbitalsState.ml_quantum_symbol.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.m') + + class AtomsState(model_system.AtomsState): model_system.AtomsState.chemical_symbol.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + model_system.AtomsState.orbitals_state.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_orbitals_state', ['.projection[1]'])) + class AtomicCell(model_system.AtomicCell): model_system.AtomicCell.atoms_state.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.labels') + model_system.AtomicCell.atoms_state.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + model_system.AtomicCell.positions.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.positions', unit='angstrom') model_system.AtomicCell.lattice_vectors.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_lattice_vectors', ['lattice_vectors']), unit='angstrom') + model_system.AtomicCell.periodic_boundary_conditions.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_pbc', ['lattice_vectors'])) + class ModelSystem(model_system.ModelSystem): model_system.AtomicCell.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + model_system.ModelSystem.cell.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + model_system.ModelSystem.model_system.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_projections', ['.projections'])) - model_system.ModelSystem.branch_label.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.atom', 'structure.positions', 'structure.labels','lattice_vectors']), search='label', cache=True) + model_system.ModelSystem.branch_label.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.projection[0]', 'structure.positions', 'structure.labels','lattice_vectors']), search='label', cache=True) - model_system.ModelSystem.atom_indices.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.atom', 'structure.positions', 'structure.labels','lattice_vectors']), search='indices') + model_system.ModelSystem.atom_indices.m_annotations[IN_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_branch_label_indices', ['.projection[0]', 'structure.positions', 'structure.labels','lattice_vectors']), search='indices', cache=True) class KMesh(numerical_settings.KMesh): numerical_settings.KMesh.n_points.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.n_points') + numerical_settings.KMesh.grid.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.grid') + + numerical_settings.KMesh.points.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_kpoints', ['.k_points'])) + + +class KLinePath(numerical_settings.KLinePath): + + numerical_settings.KLinePath.high_symmetry_path_names.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.names') + + numerical_settings.KLinePath.high_symmetry_path_values.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.values') + class KSpace(numerical_settings.KSpace): numerical_settings.KSpace.k_mesh.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.k_mesh') + numerical_settings.KSpace.k_line_path.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_k_line_path', ['.k_line_path']), cache=True) class ModelMethod(model_method.ModelMethod): @@ -67,7 +104,9 @@ class Wannier(model_method.Wannier): model_method.Wannier.energy_window_outer.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.energy_windows.outer') - model_method.Wannier.n_orbitals.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.Wannier') + model_method.Wannier.energy_window_inner.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.energy_windows.inner') + + model_method.Wannier.n_orbitals.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.Nwannier') class ElectronicBandStructure(properties.ElectronicBandStructure): @@ -77,12 +116,56 @@ class ElectronicBandStructure(properties.ElectronicBandStructure): properties.ElectronicBandStructure.value.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_data', ['.data'])) +class WignerSeitz(variables.WignerSeitz): + + variables.WignerSeitz.n_points.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.n_ws_points') + + variables.WignerSeitz.points.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.ws_points') + + +class HoppingMatrix(properties.HoppingMatrix): + + properties.HoppingMatrix.n_orbitals.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='n_orbitals') + + properties.HoppingMatrix.degeneracy_factors.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.degeneracy_factors') + + # TODO shape mismatch + # properties.HoppingMatrix.value.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.hoppings', unit='eV') + + variables.WignerSeitz.m_def.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + +class CrystalFieldSplitting(properties.CrystalFieldSplitting): + + properties.CrystalFieldSplitting.n_orbitals.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='n_orbitals') + + properties.CrystalFieldSplitting.value.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.crystal_fields', unit='eV') + + +class Energy2(variables.Energy2): + + variables.Energy2.points.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.energies', unit='eV') + + +class ElectronicDensityOfStates(properties.ElectronicDensityOfStates): + + properties.ElectronicDensityOfStates.value.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.value', unit='1/eV') + + variables.Energy2.m_def.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + class Outputs(outputs.Outputs): outputs.Outputs.electronic_band_structures.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') outputs.Outputs.electronic_band_structures.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + outputs.Outputs.hopping_matrices.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_hoppings', ['.@'], dict(ws=True))) + + outputs.Outputs.crystal_field_splittings.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_hoppings', ['.@'])) + + outputs.Outputs.electronic_dos.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper=('get_dos', ['.data'])) + class Simulation(general.Simulation): @@ -98,6 +181,10 @@ class Simulation(general.Simulation): general.Simulation.outputs.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + general.Simulation.outputs.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + + general.Simulation.outputs.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper='.@') + Simulation.m_def.m_annotations[OUT_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') @@ -105,5 +192,9 @@ class Simulation(general.Simulation): Simulation.m_def.m_annotations[BAND_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') +Simulation.m_def.m_annotations[HR_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') + +Simulation.m_def.m_annotations[DOS_ANNOTATION_KEY] = MappingAnnotationModel(mapper='@') + m_package.__init_metainfo__()