diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 7ff4356a..7d8bcbac 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -39,6 +39,7 @@ from structlog.stdlib import ( BoundLogger, ) +from nomad_measurements.xrd.nx import connect_concepts_to_hdf5 def get_reference(upload_id: str, entry_id: str) -> str: @@ -200,6 +201,23 @@ def __init__( self._hdf5_datasets = collections.OrderedDict() self._hdf5_attributes = collections.OrderedDict() + def add_group_and_return_child_group( + self, child_group_name, parent_group=None, nxclass=None + ): + """Create group with name `child_group_name` under the parent_group""" + + if (parts := child_group_name.split('[', 1)) and len(parts) > 1: + nxclass = parts[0] + grp_name_tmp = parts[1].split(']')[0] + else: + grp_name_tmp = child_group_name + parent_group.require_group(grp_name_tmp) + child_group = parent_group[grp_name_tmp] + if nxclass: + child_group.attrs['NX_class'] = 'NX' + nxclass.lower() + + return child_group + def add_dataset( # noqa: PLR0913 self, path: str, @@ -236,10 +254,7 @@ def add_dataset( # noqa: PLR0913 dataset = dict( data=data, attrs={}, - hdf5_path=( - f'/uploads/{self.archive.m_context.upload_id}/raw' - f'/{self.data_file}#{path}' - ), + hdf5_path=path, archive_path=archive_path, internal_reference=internal_reference, ) @@ -328,7 +343,13 @@ def _write_nx_file(self): """ if self.data_file.endswith('.h5'): self.data_file = self.data_file.replace('.h5', '.nxs') - raise NotImplementedError('Method `write_nx_file` is not implemented.') + + with self.archive.m_context.raw_file(self.data_file, 'rw') as h5_file: + h5 = h5py.File(h5_file.name, 'rw') + connect_concepts_to_hdf5(archive=self.archive, hdf5handler=self, h5_root=h5) + h5.close() + + # raise NotImplementedError('Method `write_nx_file` is not implemented.') # TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use # `populate_hdf5_data_dict` method for each quantity that is needed in .nxs # file. Create a NeXus file with the data in `hdf5_data_dict`. diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py index db73af2f..14c76877 100644 --- a/src/nomad_measurements/xrd/nx.py +++ b/src/nomad_measurements/xrd/nx.py @@ -16,6 +16,18 @@ # limitations under the License. # +from typing import TYPE_CHECKING + +import copy +from pynxtools.nomad.dataconverter import populate_nexus_subsection + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + + from nomad_measurements.utils import HDF5Handler + import h5py as h5 + + NEXUS_DATASET_PATHS = [ '/ENTRY[entry]/experiment_result/intensity', '/ENTRY[entry]/experiment_result/two_theta', @@ -37,3 +49,114 @@ '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', ] + + +CONCEPT_MAP = { + '/ENTRY[entry]/method': 'archive.data.method', + '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis', + '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time.magnitude', + 'line': '', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current/@units': 'archive.data.xrd_settings.source.xray_tube_current.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage/@units': 'archive.data.xrd_settings.source.xray_tube_voltage.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one/@units': 'archive.data.xrd_settings.source.kalpha_one.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two/@units': 'archive.data.xrd_settings.source.kalpha_two.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta/@units': 'archive.data.xrd_settings.source.kbeta.units', + '/ENTRY[entry]/@default': 'experiment_result', + '/ENTRY[entry]/experiment_result/@signal': 'intensity', + '/ENTRY[entry]/definition': 'NXxrd_pan', +} + + +def walk_through_object(parent_obj, attr_chain, default=None): + """ + Walk though the object until reach the leaf. + + Args: + parent_obj: This is a python obj. + attr_chain: Dot separated obj chain. + default: A value to be returned by default, if not data is found. + """ + if parent_obj is None: + return + expected_parts = 2 + if isinstance(attr_chain, str): + parts = attr_chain.split('.', 1) + + if len(parts) == expected_parts: + child_nm, rest_part = parts + if '[' in child_nm: + child_nm, index = child_nm.split('[') + index = int(index[:-1]) + # section always exists + child_obj = getattr(parent_obj, child_nm)[index] + else: + child_obj = getattr(parent_obj, child_nm, default) + return walk_through_object(child_obj, rest_part, default=default) + else: + return getattr(parent_obj, attr_chain, default) + + +def connect_concepts_to_hdf5( + archive: 'EntryArchive', hdf5handler: HDF5Handler, h5_root: h5.Group +): + """ + Connect the concepts between `ELNXrayDiffraction` and `NXxrd_pan` schema. + + Args: + template (Template): The pynxtools template, a inherited class from python dict. + archive (EntryArchive): Nomad archive contains secttions, subsections and + quantities. + scan_type (str): Name of the scan type such as line and RSM. + """ + + # General concepts + # ruff: noqa: E501 + value = None + concept_map_tmp = copy.deepcopy(CONCEPT_MAP) + handled_keys = [] + for key, archive_concept in concept_map_tmp.items(): + if isinstance(archive_concept, str) and archive_concept.startswith('archive.'): + _, arch_attr = archive_concept.split('.', 1) + value = None + try: + value = walk_through_object(archive, arch_attr) + # Use multiple excepts to avoid catching all exceptions + except (AttributeError, IndexError, KeyError, ValueError): + pass + + if value is not None: + key_parts = key.split('/') + if key[-1].startswith('@'): + # attr + key_parts = key_parts[:-2] + hdf5handler.add_attribute( + path=key, + data=value, + ) + else: + key_parts = key_parts[:-1] + hdf5handler.add_dataset( + path=key, + data=value, + ) + + # Construct group + child_group = hdf5handler.add_group_and_return_child_group( + child_group_name=key_parts[0], parent_group=h5_root + ) + for nx_concept in key_parts[1:]: + child_group = hdf5handler.add_group_and_return_child_group( + child_group_name=nx_concept, parent_group=child_group + ) + handled_keys.append(key) + + for key in handled_keys: + _ = concept_map_tmp.pop(key, None) \ No newline at end of file