Skip to content

Commit

Permalink
Added automatic connection with CASTEP entries
Browse files Browse the repository at this point in the history
Added nomad-lab[infrastructure] extra dependencies

Added workflow for NMR CASTEP or QE + magres entry
  • Loading branch information
JosePizarro3 committed Jul 29, 2024
1 parent b3dd0d3 commit 0d9ee41
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 662 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ dev = [
"pytest",
"pytest-timeout",
"pytest-cov",
"structlog"
"structlog",
"nomad-lab[infrastructure]", # for search and MetadataRequired to work
]

[tool.ruff]
Expand Down
143 changes: 102 additions & 41 deletions src/nomad_parser_magres/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
from nomad_simulations.schema_packages.model_system import Cell
from structlog.stdlib import BoundLogger

from nomad.app.v1.models import MetadataRequired
from nomad.config import config
from nomad.datamodel.metainfo.workflow import Link, TaskReference
from nomad.parsing.file_parser import Quantity, TextParser
from nomad.search import search
from nomad.units import ureg
from nomad.utils import extract_section
from nomad_simulations.schema_packages.atoms_state import AtomsState
from nomad_simulations.schema_packages.general import Program, Simulation
from nomad_simulations.schema_packages.model_method import (
Expand All @@ -28,10 +32,11 @@
Outputs,
SpinSpinCoupling,
)

# from nomad.app.v1.models import MetadataRequired
# from nomad.search import search
# from .utils import BeyondDFTWorkflowsParser
from nomad_parser_magres.schema_packages.workflow import (
NMRMagRes,
NMRMagResMethod,
NMRMagResResults,
)

re_float = r' *[-+]?\d+\.\d*(?:[Ee][-+]\d+)? *'

Expand Down Expand Up @@ -580,24 +585,77 @@ def parse_outputs(
magres_data=magres_data, cell=cell, logger=logger
)
if len(efg) > 0:
outputs.electric_field_gradients.append(efg)
outputs.electric_field_gradients = efg

# Parse `SpinSpinCoupling`
isc = self.parse_spin_spin_couplings(
magres_data=magres_data, cell=cell, logger=logger
)
if len(isc) > 0:
outputs.spin_spin_couplings.append(isc)
outputs.spin_spin_couplings = isc

# Parse `MagneticSusceptibility`
mag_sus = self.parse_magnetic_susceptibilities(
magres_data=magres_data, logger=logger
)
if len(mag_sus) > 0:
outputs.magnetic_susceptibilities.append(mag_sus)
outputs.magnetic_susceptibilities = mag_sus

return outputs

def parse_nmr_magres_file_format(
self, nmr_first_principles_archive: 'EntryArchive'
):
"""
Automatically parses the NMR Magres workflow. Here, `self.archive` is the
NMR magres archive in which we will link the original NMR first principles (CASTEP
or QuantumESPRESSO) entry.
Args:
nmr_first_principles_archive (EntryArchive): the NMR (first principles) CASTEP or QuantumESPRESSO archive.
"""
workflow = NMRMagRes(method=NMRMagResMethod(), results=NMRMagResResults())
workflow.name = 'NMR Magres'

# ! Fix this once CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data'
# Method
# method_nmr = extract_section(nmr_first_principles_archive, ['run', 'method'])
# workflow.method.nmr_method_ref = method_nmr

# Inputs and Outputs
# ! Fix this to extract `input_structure` from `nmr_first_principles_archive` once
# ! CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data'
input_structure = extract_section(self.archive, ['data', 'model_system'])
nmr_magres_calculation = extract_section(self.archive, ['data', 'outputs'])
if input_structure:
workflow.m_add_sub_section(
NMRMagRes.inputs, Link(name='Input structure', section=input_structure)
)
if nmr_magres_calculation:
workflow.m_add_sub_section(
NMRMagRes.outputs,
Link(name='Output NMR calculation', section=nmr_magres_calculation),
)

# NMR (first principles) task
# ! Fix this once CASTEP and QuantumESPRESSO use the new `nomad-simulations` schema under 'data'
program_name = nmr_first_principles_archive.run[-1].program.name
if nmr_first_principles_archive.workflow2:
task = TaskReference(task=nmr_first_principles_archive.workflow2)
task.name = f'NMR FirstPrinciples {program_name}'
if input_structure:
task.inputs = [Link(name='Input structure', section=input_structure)]
if nmr_magres_calculation:
task.outputs = [
Link(
name='Output NMR calculation',
section=nmr_magres_calculation,
)
]
workflow.m_add_sub_section(NMRMagRes.tasks, task)

self.archive.workflow2 = workflow

def parse(
self, filepath: str, archive: 'EntryArchive', logger: 'BoundLogger'
) -> None:
Expand Down Expand Up @@ -637,37 +695,40 @@ def parse(
if outputs is not None:
simulation.outputs.append(outputs)

# # We try to resolve the entry_id and mainfile of other entries in the upload
# filepath_stripped = self.filepath.split('raw/')[-1]
# metadata = []
# try:
# upload_id = self.archive.metadata.upload_id
# search_ids = search(
# owner='visible',
# user_id=self.archive.metadata.main_author.user_id,
# query={'upload_id': upload_id},
# required=MetadataRequired(include=['entry_id', 'mainfile']),
# ).data
# metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids]
# except Exception:
# self.logger.warning(
# 'Could not resolve the entry_id and mainfile of other entries in the upload.'
# )
# return
# for entry_id, mainfile in metadata:
# if mainfile == filepath_stripped: # we skip the current parsed mainfile
# continue
# # We try to load the archive from its context and connect both the CASTEP
# # and the magres entries
# try:
# entry_archive = archive.m_context.load_archive(
# entry_id, upload_id, None
# )
# method_label = entry_archive.run[-1].method[-1].label
# if method_label == 'NMR':
# castep_archive = entry_archive
# # We write the workflow NMRMagRes directly in the magres entry
# self.parse_nmr_magres_file_format(castep_archive)
# break
# except Exception:
# continue
# Try to resolve the `entry_id` and `mainfile` of other entries in the upload to connect the magres entry with the CASTEP or QuantumESPRESSO entry
filepath_stripped = self.mainfile.split('raw/')[-1]
metadata = []
try:
upload_id = self.archive.metadata.upload_id
search_ids = search(
owner='visible',
user_id=self.archive.metadata.main_author.user_id,
query={'upload_id': upload_id},
required=MetadataRequired(include=['entry_id', 'mainfile']),
).data
metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids]
except Exception:
logger.warning(
'Could not resolve the entry_id and mainfile of other entries in the upload.'
)
return
for entry_id, mainfile in metadata:
if mainfile == filepath_stripped: # we skip the current parsed mainfile
continue
# We try to load the archive from its context and connect both the CASTEP and the magres entries
# ? add more checks on the system information for the connection?
try:
entry_archive = self.archive.m_context.load_archive(
entry_id, upload_id, None
)
# ! Fix this when CASTEP parser uses the new `data` schema
method_label = entry_archive.run[-1].method[-1].label
if method_label == 'NMR':
castep_archive = entry_archive
# We write the workflow NMRMagRes directly in the magres entry
self.parse_nmr_magres_file_format(
nmr_first_principles_archive=castep_archive
)
break
except Exception:
continue
2 changes: 0 additions & 2 deletions src/nomad_parser_magres/parsers/utils/__init__.py

This file was deleted.

Loading

1 comment on commit 0d9ee41

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
src/nomad_parser_magres
   __init__.py4250%3–4
   _version.py11282%5–6
src/nomad_parser_magres/parsers
   __init__.py8188%11
   parser.py2412362%7–9, 12–734
src/nomad_parser_magres/schema_packages
   __init__.py8275%9–11
   package.py1021020%1–393
   workflow.py12120%1–43
TOTAL3863578% 

Tests Skipped Failures Errors Time
1 0 💤 0 ❌ 1 🔥 16.450s ⏱️

Please sign in to comment.