Skip to content

Commit

Permalink
Adding nexus in ref (#150)
Browse files Browse the repository at this point in the history
* Remove the Nexus file before regenerating it.


* Reference to the NeXus entry.

* PR review comments.
  • Loading branch information
RubelMozumder authored Dec 20, 2024
1 parent 19dec87 commit 90d07a1
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 10 deletions.
146 changes: 136 additions & 10 deletions src/nomad_measurements/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# limitations under the License.
#
import collections
import copy
import os.path
import re
from typing import (
Expand All @@ -30,6 +31,14 @@
from nomad.datamodel.hdf5 import HDF5Reference
from nomad.units import ureg
from pydantic import BaseModel, Field
from pynxtools.dataconverter.helpers import (
generate_template_from_nxdl,
get_nxdl_root_and_path,
)
from pynxtools.dataconverter.template import Template
from pynxtools.dataconverter.writer import Writer as pynxtools_writer

from nomad_measurements.xrd.nx import CONCEPT_MAP

if TYPE_CHECKING:
from nomad.datamodel.data import (
Expand All @@ -43,6 +52,10 @@
)


class NXFileGenerationError(Exception):
pass


def get_reference(upload_id: str, entry_id: str) -> str:
return f'../uploads/{upload_id}/archive/{entry_id}#data'

Expand Down Expand Up @@ -334,9 +347,13 @@ def write_file(self):
except Exception as e:
self.nexus = False
self.logger.warning(
f'Encountered "{e}" error while creating nexus file. '
'Creating h5 file instead.'
f"""NeXusFileGenerationError: Encountered '{e}' error while creating
nexus file. Creating h5 file instead."""
)
if self.archive.m_context.raw_path_exists(self.data_file):
os.remove(
os.path.join(self.archive.m_context.raw_path(), self.data_file)
)
self._write_hdf5_file()
else:
self._write_hdf5_file()
Expand All @@ -347,14 +364,62 @@ def _write_nx_file(self):
to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus
view of the data in addition to storing array data.
"""
if self.data_file.endswith('.h5'):
self.data_file = self.data_file.replace('.h5', '.nxs')
raise NotImplementedError('Method `write_nx_file` is not implemented.')
# TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use
# `populate_hdf5_data_dict` method for each quantity that is needed in .nxs
# file. Create a NeXus file with the data in `hdf5_data_dict`.
# One issue here is as we populate the `hdf5_data_dict` with the archive data,
# we will always have to over write the nexus file
from nomad.processing.data import Entry

app_def = 'NXxrd_pan'
nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def)
template = Template()
generate_template_from_nxdl(nxdl_root, template)
attr_dict = {}
dataset_dict = {}
self.populate_nx_dataset_and_attribute(
attr_dict=attr_dict, dataset_dict=dataset_dict
)
for nx_path, dset_original in list(self._hdf5_datasets.items()) + list(
dataset_dict.items()
):
dset = copy.deepcopy(dset_original)
if dset.internal_reference:
# convert to the nexus type link
dset.data = {'link': self._remove_nexus_annotations(dset.data)}

try:
template[nx_path] = dset.data
except KeyError:
template['optional'][nx_path] = dset.data

hdf5_path = self._remove_nexus_annotations(nx_path)
self._set_hdf5_reference(
self.archive,
dset.archive_path,
f'/uploads/{self.archive.m_context.upload_id}/raw'
f'/{self.data_file}#{hdf5_path}',
)
for nx_path, attr_d in list(self._hdf5_attributes.items()) + list(
attr_dict.items()
):
for attr_k, attr_v in attr_d.items():
if attr_v != 'dimensionless' and attr_v:
try:
template[f'{nx_path}/@{attr_k}'] = attr_v
except KeyError:
template['optional'][f'{nx_path}/@{attr_k}'] = attr_v

nx_full_file_path = os.path.join(
self.archive.m_context.raw_path(), self.data_file
)

if self.archive.m_context.raw_path_exists(self.data_file):
os.remove(nx_full_file_path)
pynxtools_writer(
data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path
).write()

entry_list = Entry.objects(
upload_id=self.archive.m_context.upload_id, mainfile=self.data_file
)
if not entry_list:
self.archive.m_context.process_updated_raw_file(self.data_file)

def _write_hdf5_file(self): # noqa: PLR0912
"""
Expand Down Expand Up @@ -422,6 +487,67 @@ def _write_hdf5_file(self): # noqa: PLR0912
self._hdf5_datasets = collections.OrderedDict()
self._hdf5_attributes = collections.OrderedDict()

@staticmethod
def walk_through_object(parent_obj, attr_chain):
"""
Walk though the object until reach the leaf.
Args:
parent_obj: This is a python obj.
e.g.Arvhive
attr_chain: Dot separated obj chain.
e.g. 'archive.data.xrd_settings.source.xray_tube_material'
default: A value to be returned by default, if not data is found.
"""
if parent_obj is None:
return parent_obj

if isinstance(attr_chain, str) and attr_chain.startswith('archive.'):
parts = attr_chain.split('.')
child_obj = None
for part in parts[1:]:
child_nm = part
if '[' in child_nm:
child_nm, index = child_nm.split('[')
index = int(index[:-1])
# section always exists
child_obj = getattr(parent_obj, child_nm)[index]
else:
child_obj = getattr(parent_obj, child_nm, None)
if child_obj is None:
return None
parent_obj = child_obj

return child_obj

def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
"""Construct datasets and attributes for nexus and populate."""

for nx_path, arch_path in CONCEPT_MAP.items():
if arch_path.startswith('archive.'):
data = self.walk_through_object(self.archive, arch_path)
else:
data = arch_path # default value

dataset = DatasetModel(
data=data,
)

if (
isinstance(data, pint.Quantity)
and str(data.units) != 'dimensionless'
and str(data.units)
):
attr_tmp = {nx_path: dict(units=str(data.units))}
attr_dict |= attr_tmp
dataset.data = data.magnitude

l_part, r_part = nx_path.split('/', 1)
if r_part.startswith('@'):
attr_dict[l_part] = {r_part.replace('@', ''): data}
else:
dataset_dict[nx_path] = dataset

@staticmethod
def _remove_nexus_annotations(path: str) -> str:
"""
Expand Down
33 changes: 33 additions & 0 deletions src/nomad_measurements/xrd/nx.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,36 @@
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone',
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
]


CONCEPT_MAP = {
'/ENTRY[entry]/@default': 'experiment_result',
'/ENTRY[entry]/definition': 'NXxrd_pan',
'/ENTRY[entry]/method': 'archive.data.method',
'/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
'/ENTRY[entry]/experiment_result/@signal': 'intensity',
'/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': (
'archive.data.results[0].scan_axis'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': (
'archive.data.xrd_settings.source.xray_tube_material'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': (
'archive.data.xrd_settings.source.xray_tube_current'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': (
'archive.data.xrd_settings.source.xray_tube_voltage'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': (
'archive.data.xrd_settings.source.kalpha_one'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': (
'archive.data.xrd_settings.source.kalpha_two'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': (
'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone'
),
'/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': (
'archive.data.xrd_settings.source.kbeta'
),
}
20 changes: 20 additions & 0 deletions src/nomad_measurements/xrd/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -73,6 +74,8 @@
from nomad_measurements.utils import (
HDF5Handler,
get_bounding_range_2d,
get_entry_id_from_file_name,
get_reference,
merge_sections,
)
from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS
Expand Down Expand Up @@ -1137,6 +1140,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
diffraction_method_name.m_annotations['eln'] = ELNAnnotation(
component=ELNComponentEnum.EnumEditQuantity,
)
nexus_results = Quantity(
type=ArchiveSection,
description='Reference to the NeXus entry.',
a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity),
)

def get_read_write_functions(self) -> tuple[Callable, Callable]:
"""
Expand Down Expand Up @@ -1300,6 +1308,18 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
self.hdf5_handler.write_file()
if self.hdf5_handler.data_file != self.auxiliary_file:
self.auxiliary_file = self.hdf5_handler.data_file

if archive.m_context.raw_path_exists(
self.auxiliary_file
) and self.auxiliary_file.endswith('.nxs'):
nx_entry_id = get_entry_id_from_file_name(
archive=archive, file_name=self.auxiliary_file
)
ref_to_nx_entry_data = get_reference(
archive.metadata.upload_id, nx_entry_id
)
self.nexus_results = f'{ref_to_nx_entry_data}'

super().normalize(archive, logger)


Expand Down

0 comments on commit 90d07a1

Please sign in to comment.