From 898e3fe8e8b2e914a2e2c4e9c4ee75bdea28dbd3 Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Mon, 19 Aug 2024 15:45:37 +0200 Subject: [PATCH 01/41] updated plugin structure --- MANIFEST.in | 1 + src/nomad_measurements/general/__init__.py | 15 +++++++++++++++ .../{__init__.py => general/schema.py} | 0 3 files changed, 16 insertions(+) create mode 100644 src/nomad_measurements/general/__init__.py rename src/nomad_measurements/{__init__.py => general/schema.py} (100%) diff --git a/MANIFEST.in b/MANIFEST.in index e69de29b..b5ccc2d3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include * nomad_plugin.yaml diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py new file mode 100644 index 00000000..7e990a5d --- /dev/null +++ b/src/nomad_measurements/general/__init__.py @@ -0,0 +1,15 @@ + +from nomad.config.models.plugins import SchemaPackageEntryPoint + +class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint): + + def load(self): + from nomad_measurements.general.schema import m_package + + return m_package + + +general_schema = GeneralSchemaPackageEntryPoint( + name='GeneralSchema', + description='Schema package defined using the new plugin mechanism.', +) diff --git a/src/nomad_measurements/__init__.py b/src/nomad_measurements/general/schema.py similarity index 100% rename from src/nomad_measurements/__init__.py rename to src/nomad_measurements/general/schema.py From 2f81aa5ed4958ac80b32538cd9f9abb8416e343e Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Mon, 19 Aug 2024 16:01:02 +0200 Subject: [PATCH 02/41] added pynxtools dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a9392d6..cbb1a390 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,10 @@ maintainers = [ ] license = { file = "LICENSE" } dependencies = [ - "nomad-lab>=1.3.6", + "nomad-lab>=1.3.6", "xmltodict==0.13.0", "fairmat-readers-xrd>=0.0.3", + "pynxtools@git+https://github.com/FAIRmat-NFDI/pynxtools.git@master", "nomad-material-processing", "fairmat-readers-transmission", ] From 103076bd955fce936ab2c7db49bd6f6c7b4bd7fa Mon Sep 17 00:00:00 2001 From: Andrea Albino <95371554+aalbino2@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:51:52 +0200 Subject: [PATCH 03/41] Apply suggestions from Sarthak's code review Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> --- src/nomad_measurements/general/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py index 7e990a5d..6b2a9d9c 100644 --- a/src/nomad_measurements/general/__init__.py +++ b/src/nomad_measurements/general/__init__.py @@ -9,7 +9,7 @@ def load(self): return m_package -general_schema = GeneralSchemaPackageEntryPoint( +schema = GeneralSchemaPackageEntryPoint( name='GeneralSchema', description='Schema package defined using the new plugin mechanism.', ) From a42a70745b927a1a121ed74588f819ed0e92ff56 Mon Sep 17 00:00:00 2001 From: aalbino2 Date: Tue, 20 Aug 2024 14:25:26 +0200 Subject: [PATCH 04/41] ruff linting --- src/nomad_measurements/general/__init__.py | 1 + src/nomad_measurements/xrd/__init__.py | 1 + src/nomad_measurements/xrd/schema.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py index 6b2a9d9c..d547c857 100644 --- a/src/nomad_measurements/general/__init__.py +++ b/src/nomad_measurements/general/__init__.py @@ -1,6 +1,7 @@ from nomad.config.models.plugins import SchemaPackageEntryPoint + class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint): def load(self): diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py index a088a3d4..be4f237f 100644 --- a/src/nomad_measurements/xrd/__init__.py +++ b/src/nomad_measurements/xrd/__init__.py @@ -1,6 +1,7 @@ from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint + class XRDSchemaPackageEntryPoint(SchemaPackageEntryPoint): def load(self): from nomad_measurements.xrd.schema import m_package diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index c6a2e3df..d12fb8eb 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -19,6 +19,7 @@ TYPE_CHECKING, Any, Callable, + Dict, ) import numpy as np From 48be124ee5dbaa0b7880758e422a9ff2684348c8 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 20 Aug 2024 15:31:31 +0200 Subject: [PATCH 05/41] Ruff linting 2 --- src/nomad_measurements/general/__init__.py | 2 -- src/nomad_measurements/xrd/__init__.py | 1 - src/nomad_measurements/xrd/schema.py | 1 - 3 files changed, 4 deletions(-) diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py index d547c857..853e1803 100644 --- a/src/nomad_measurements/general/__init__.py +++ b/src/nomad_measurements/general/__init__.py @@ -1,9 +1,7 @@ - from nomad.config.models.plugins import SchemaPackageEntryPoint class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint): - def load(self): from nomad_measurements.general.schema import m_package diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py index be4f237f..a088a3d4 100644 --- a/src/nomad_measurements/xrd/__init__.py +++ b/src/nomad_measurements/xrd/__init__.py @@ -1,7 +1,6 @@ from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint - class XRDSchemaPackageEntryPoint(SchemaPackageEntryPoint): def load(self): from nomad_measurements.xrd.schema import m_package diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index d12fb8eb..c6a2e3df 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -19,7 +19,6 @@ TYPE_CHECKING, Any, Callable, - Dict, ) import numpy as np From 282c48f3ebd57f76e5e373a48beb701f0759347c Mon Sep 17 00:00:00 2001 From: Andrea Albino <95371554+aalbino2@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:07:30 +0200 Subject: [PATCH 06/41] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> Co-authored-by: Hampus Näsström --- src/nomad_measurements/general/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py index 853e1803..621f7b0c 100644 --- a/src/nomad_measurements/general/__init__.py +++ b/src/nomad_measurements/general/__init__.py @@ -9,6 +9,6 @@ def load(self): schema = GeneralSchemaPackageEntryPoint( - name='GeneralSchema', + name='General Schema', description='Schema package defined using the new plugin mechanism.', ) From bc29a436c1cffb386dd3880d6e90d31526a987fb Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Wed, 21 Aug 2024 09:36:58 +0200 Subject: [PATCH 07/41] changed xrd parser folder --- src/nomad_measurements/xrd/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py index a088a3d4..3ecf893f 100644 --- a/src/nomad_measurements/xrd/__init__.py +++ b/src/nomad_measurements/xrd/__init__.py @@ -27,3 +27,18 @@ def load(self): mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$', mainfile_mime_re='text/.*|application/zip', ) + + +class XRDParserEntryPoint(ParserEntryPoint): + def load(self): + from nomad_measurements.xrd.parser import XRDParser + + return XRDParser(**self.dict()) + + +parser = XRDParserEntryPoint( + name='XRD Parser', + description='Parser defined using the new plugin mechanism.', + mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$', + mainfile_mime_re='text/.*|application/zip', +) From 581a77dbafc0e9d0092c5b5452407ce8aab7daa1 Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Wed, 21 Aug 2024 10:00:23 +0200 Subject: [PATCH 08/41] last fixes and descriptions --- MANIFEST.in | 1 - src/nomad_measurements/xrd/__init__.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index b5ccc2d3..e69de29b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +0,0 @@ -recursive-include * nomad_plugin.yaml diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py index 3ecf893f..86e95635 100644 --- a/src/nomad_measurements/xrd/__init__.py +++ b/src/nomad_measurements/xrd/__init__.py @@ -38,7 +38,7 @@ def load(self): parser = XRDParserEntryPoint( name='XRD Parser', - description='Parser defined using the new plugin mechanism.', + description='Parser for several kinds of raw files from XRD measurements.', mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$', mainfile_mime_re='text/.*|application/zip', ) From cdc749d1035b2ef9ee8c8b0d51e06fc4fa010ee9 Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Wed, 21 Aug 2024 10:13:34 +0200 Subject: [PATCH 09/41] description of general schema --- src/nomad_measurements/general/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py index 621f7b0c..bcfec234 100644 --- a/src/nomad_measurements/general/__init__.py +++ b/src/nomad_measurements/general/__init__.py @@ -10,5 +10,5 @@ def load(self): schema = GeneralSchemaPackageEntryPoint( name='General Schema', - description='Schema package defined using the new plugin mechanism.', + description='Schema package containing basic classes used around in the plugin.', ) From ce2a3640375d004f604decf1a691a43317e2fe81 Mon Sep 17 00:00:00 2001 From: Andrea Albino Date: Wed, 21 Aug 2024 16:08:10 +0200 Subject: [PATCH 10/41] changed general package into a module --- .../{general/schema.py => __init__.py} | 0 src/nomad_measurements/general/__init__.py | 14 -------------- 2 files changed, 14 deletions(-) rename src/nomad_measurements/{general/schema.py => __init__.py} (100%) delete mode 100644 src/nomad_measurements/general/__init__.py diff --git a/src/nomad_measurements/general/schema.py b/src/nomad_measurements/__init__.py similarity index 100% rename from src/nomad_measurements/general/schema.py rename to src/nomad_measurements/__init__.py diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py deleted file mode 100644 index bcfec234..00000000 --- a/src/nomad_measurements/general/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from nomad.config.models.plugins import SchemaPackageEntryPoint - - -class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint): - def load(self): - from nomad_measurements.general.schema import m_package - - return m_package - - -schema = GeneralSchemaPackageEntryPoint( - name='General Schema', - description='Schema package containing basic classes used around in the plugin.', -) From dfb04b94936b3abf12b1c008f9ca33f27da309f0 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 11 Jun 2024 15:55:06 +0200 Subject: [PATCH 11/41] Implement write nexus section based on the populated nomad archive --- src/nomad_measurements/xrd/schema.py | 167 +++++---------------------- 1 file changed, 30 insertions(+), 137 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index c6a2e3df..a5c7cb14 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -78,6 +78,10 @@ from structlog.stdlib import ( BoundLogger, ) + import pint + +from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection +from pynxtools import dataconverter from nomad.config import config @@ -86,48 +90,6 @@ m_package = SchemaPackage(aliases=['nomad_measurements.xrd.parser.parser']) -def populate_nexus_subsection(**kwargs): - raise NotImplementedError - - -def handle_nexus_subsection( - xrd_template: 'Template', - nexus_out: str, - archive: 'EntryArchive', - logger: 'BoundLogger', -): - """ - Function for populating the NeXus section from the xrd_template. - - Args: - xrd_template (Template): The xrd data in a NeXus Template. - nexus_out (str): The name of the optional NeXus output file. - archive (EntryArchive): The archive containing the section. - logger (BoundLogger): A structlog logger. - """ - nxdl_name = 'NXxrd_pan' - if nexus_out: - if not nexus_out.endswith('.nxs'): - nexus_out = nexus_out + '.nxs' - populate_nexus_subsection( - template=xrd_template, - app_def=nxdl_name, - archive=archive, - logger=logger, - output_file_path=nexus_out, - on_temp_file=False, - ) - else: - populate_nexus_subsection( - template=xrd_template, - app_def=nxdl_name, - archive=archive, - logger=logger, - output_file_path=nexus_out, - on_temp_file=True, - ) - - def calculate_two_theta_or_q( wavelength: 'pint.Quantity', q: 'pint.Quantity' = None, @@ -878,7 +840,6 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): label='X-Ray Diffraction (XRD)', a_eln=ELNAnnotation( lane_width='800px', - hide=['generate_nexus_file'], ), a_template={ 'measurement_identifiers': {}, @@ -996,108 +957,39 @@ def write_xrd_data( ) merge_sections(self, xrd, logger) - def write_nx_xrd( - self, - xrd_dict: 'Template', - archive: 'EntryArchive', - logger: 'BoundLogger', - ) -> None: + def write_nx_section_and_create_file( + self, archive: 'EntryArchive', logger: 'BoundLogger' + ): """ - Populate `ELNXRayDiffraction` section from a NeXus Template. + Uses the archive to generate the NeXus section and .nxs file. Args: - xrd_dict (Dict[str, Any]): A dictionary with the XRD data. archive (EntryArchive): The archive containing the section. logger (BoundLogger): A structlog logger. """ - # TODO add the result section based on the scan_type - result = XRDResult( - intensity=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/intensity', - None, - ), - two_theta=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/two_theta', - None, - ), - omega=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/omega', - None, - ), - chi=xrd_dict.get('/ENTRY[entry]/2theta_plot/chi', None), - phi=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/phi', - None, - ), - scan_axis=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', - None, - ), - integration_time=xrd_dict.get( - '/ENTRY[entry]/COLLECTION[collection]/count_time', None - ), - ) - result.normalize(archive, logger) - - source = XRayTubeSource( - xray_tube_material=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', - None, - ), - kalpha_one=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', - None, - ), - kalpha_two=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', - None, - ), - ratio_kalphatwo_kalphaone=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', - None, - ), - kbeta=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', - None, - ), - xray_tube_voltage=xrd_dict.get( - 'ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', - None, - ), - xray_tube_current=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', - None, - ), - ) - source.normalize(archive, logger) - - xrd_settings = XRDSettings(source=source) - xrd_settings.normalize(archive, logger) - - sample = CompositeSystemReference( - lab_id=xrd_dict.get( - '/ENTRY[entry]/SAMPLE[sample]/sample_id', - None, - ), - ) - sample.normalize(archive, logger) - - xrd = ELNXRayDiffraction( - results=[result], - xrd_settings=xrd_settings, - samples=[sample], + nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan') + template = dataconverter.template.Template() + dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template) + + template['/ENTRY[entry]/2theta_plot/intensity'] = archive.data.results[ + 0 + ].intensity.magnitude + template['/ENTRY[entry]/2theta_plot/two_theta'] = archive.data.results[ + 0 + ].two_theta.magnitude + template['/ENTRY[entry]/2theta_plot/two_theta/@units'] = str( + archive.data.results[0].two_theta.units ) - merge_sections(self, xrd, logger) + archive_name = archive.metadata.mainfile.split('.')[0] + nexus_output = f'{archive_name}_output.nxs' - nexus_output = None - if self.generate_nexus_file: - archive_name = archive.metadata.mainfile.split('.')[0] - nexus_output = f'{archive_name}_output.nxs' - handle_nexus_subsection( - xrd_dict, - nexus_output, - archive, - logger, + populate_nexus_subsection( + template=template, + app_def='NXxrd_pan', + archive=archive, + logger=logger, + output_file_path=nexus_output, + on_temp_file=self.generate_nexus_file, ) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): @@ -1122,6 +1014,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) if not self.results: return + self.write_nx_section_and_create_file(archive, logger) self.figures = self.results[0].generate_plots(archive, logger) From 78fe74a7f262e324f57c7f35efedeab6b708c21b Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 27 Aug 2024 11:41:26 +0200 Subject: [PATCH 12/41] Update path of populate_nexus_subsection --- src/nomad_measurements/xrd/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index a5c7cb14..144a9ed7 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -80,7 +80,7 @@ ) import pint -from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection +from pynxtools.nomad.dataconverter import populate_nexus_subsection from pynxtools import dataconverter from nomad.config import config From 380d21af0cdeed36f64e9f3462d031998d9df6d5 Mon Sep 17 00:00:00 2001 From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:57:18 +0200 Subject: [PATCH 13/41] app def missing. (#108) * Implement write nexus section based on the populated nomad archive * app def missing. * mapping nomad_measurement. * All concept are connected, creates nexus file and subsection. * adding links in hdf5 file. * Remove the nxs file. * back to the previous design. * Include pynxtools plugins in nomad.yaml and extend dependencies including pynxtools ans pnxtools-xrd. * PR review correction. * Remove the entry_type overwtitten. * Remove comments. * Replace __str__ function. * RUFF * Update pyproject.toml Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> * Update src/nomad_measurements/xrd/schema.py Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> * Update src/nomad_measurements/xrd/nx.py * Replace Try-block. --------- Co-authored-by: Sarthak Kapoor Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> --- src/nomad_measurements/xrd/nx.py | 182 +++++++++++++++++++++++++++ src/nomad_measurements/xrd/schema.py | 47 ++----- 2 files changed, 189 insertions(+), 40 deletions(-) create mode 100644 src/nomad_measurements/xrd/nx.py diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py new file mode 100644 index 00000000..e1b41fcf --- /dev/null +++ b/src/nomad_measurements/xrd/nx.py @@ -0,0 +1,182 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import TYPE_CHECKING + +from pynxtools import dataconverter +from pynxtools.nomad.dataconverter import populate_nexus_subsection + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import ( + BoundLogger, + ) + + +def walk_through_object(parent_obj, attr_chain, default=None): + """ + Walk though the object until reach the leaf. + + Args: + parent_obj: This is a python obj. + attr_chain: Dot separated obj chain. + default: A value to be returned by default, if not data is found. + """ + expected_parts = 2 + if isinstance(attr_chain, str): + parts = attr_chain.split('.', 1) + + if len(parts) == expected_parts: + child_nm, rest_part = parts + if '[' in child_nm: + child_nm, index = child_nm.split('[') + index = int(index[:-1]) + child_obj = getattr(parent_obj, child_nm)[index] + else: + child_obj = getattr(parent_obj, child_nm) + return walk_through_object(child_obj, rest_part, default=default) + else: + return getattr(parent_obj, attr_chain, default) + + +def connect_concepts(template, archive: 'EntryArchive', scan_type: str): # noqa: PLR0912 + """ + Connect the concepts between `ELNXrayDiffraction` and `NXxrd_pan` schema. + + Args: + template (Template): The pynxtools template, a inherited class from python dict. + archive (EntryArchive): Nomad archive contains secttions, subsections and + quantities. + scan_type (str): Name of the scan type such as line and RSM. + """ + + # General concepts + # ruff: noqa: E501 + concept_map = { + '/ENTRY[entry]/method': 'archive.data.method', + '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', + '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity.magnitude', + '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta.magnitude', + '/ENTRY[entry]/experiment_result/two_theta/@units': 'archive.data.results[0].two_theta.units', + '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega.magnitude', + '/ENTRY[entry]/experiment_result/omega/@units': 'archive.data.results[0].omega.units', + '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi.magnitude', + '/ENTRY[entry]/experiment_result/chi/@units': 'archive.data.results[0].chi.units', + '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi.magnitude', + '/ENTRY[entry]/experiment_result/phi/@units': 'archive.data.results[0].phi.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis', + '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time.magnitude', + 'line': '', # For future implementation + 'rsm': { + '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel', + '/ENTRY[entry]/experiment_result/q_parallel/@units': 'archive.data.results[0].q_parallel.units', + '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular.magnitude', + '/ENTRY[entry]/experiment_result/q_perpendicular/@units': 'archive.data.results[0].q_perpendicular.units', + '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm.magnitude', + '/ENTRY[entry]/experiment_result/q_norm/@units': 'archive.data.results[0].q_norm.units', + }, + # Source + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current/@units': 'archive.data.xrd_settings.source.xray_tube_current.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage/@units': 'archive.data.xrd_settings.source.xray_tube_voltage.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one/@units': 'archive.data.xrd_settings.source.kalpha_one.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two/@units': 'archive.data.xrd_settings.source.kalpha_two.units', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta.magnitude', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta/@units': 'archive.data.xrd_settings.source.kbeta.units', + } + + for key, archive_concept in concept_map.items(): + if isinstance(archive_concept, dict): + if key == scan_type: + for sub_key, sub_archive_concept in archive_concept.items(): + _, arch_attr = sub_archive_concept.split('.', 1) + value = None + try: + value = walk_through_object(archive, arch_attr) + except (AttributeError, IndexError, KeyError, ValueError): + pass + finally: + if value is not None: + template[sub_key] = ( + str(value) if sub_key.endswith('units') else value + ) + else: + continue + elif archive_concept: + _, arch_attr = archive_concept.split('.', 1) + value = None + try: + value = walk_through_object(archive, arch_attr) + # Use multiple excepts to avoid catching all exceptions + except (AttributeError, IndexError, KeyError, ValueError): + pass + finally: + if value is not None: + template[key] = str(value) if key.endswith('units') else value + + template['/ENTRY[entry]/definition'] = 'NXxrd_pan' + + # Links to the data and concepts + template['/ENTRY[entry]/@default'] = 'experiment_result' + template['/ENTRY[entry]/experiment_result/@signal'] = 'intensity' + template['/ENTRY[entry]/experiment_result/@axes'] = 'two_theta' + template['/ENTRY[entry]/q_data/q'] = { + 'link': '/ENTRY[entry]/experiment_result/q_norm' + } + template['/ENTRY[entry]/q_data/intensity'] = { + 'link': '/ENTRY[entry]/experiment_result/intensity' + } + template['/ENTRY[entry]/q_data/q_parallel'] = { + 'link': '/ENTRY[entry]/experiment_result/q_parallel' + } + template['/ENTRY[entry]/q_data/q_perpendicular'] = { + 'link': '/ENTRY[entry]/experiment_result/q_perpendicular' + } + + +def write_nx_section_and_create_file( + archive: 'EntryArchive', logger: 'BoundLogger', scan_type: str = 'line' +): + """ + Uses the archive to generate the NeXus section and .nxs file. + + Args: + archive (EntryArchive): The archive containing the section. + logger (BoundLogger): A structlog logger. + generate_nexus_file (boolean): If True, the function will generate a .nxs file. + nxs_as_entry (boolean): If True, the function will generate a .nxs file + as a nomad entry. + """ + nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan') + template = dataconverter.template.Template() + dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template) + connect_concepts(template, archive, scan_type=scan_type) + archive_name = archive.metadata.mainfile.split('.')[0] + nexus_output = f'{archive_name}.nxs' + + populate_nexus_subsection( + template=template, + app_def='NXxrd_pan', + archive=archive, + logger=logger, + output_file_path=nexus_output, + ) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 144a9ed7..2923a17e 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -68,20 +68,17 @@ NOMADMeasurementsCategory, ) from nomad_measurements.utils import get_bounding_range_2d, merge_sections +from nomad_measurements.xrd.nx import write_nx_section_and_create_file if TYPE_CHECKING: import pint from nomad.datamodel.datamodel import ( EntryArchive, ) - from pynxtools.dataconverter.template import Template from structlog.stdlib import ( BoundLogger, ) - import pint -from pynxtools.nomad.dataconverter import populate_nexus_subsection -from pynxtools import dataconverter from nomad.config import config @@ -862,6 +859,7 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): generate_nexus_file = Quantity( type=bool, description='Whether or not to generate a NeXus output file (if possible).', + default=True, a_eln=ELNAnnotation( component=ELNComponentEnum.BoolEditQuantity, label='Generate NeXus file', @@ -957,41 +955,6 @@ def write_xrd_data( ) merge_sections(self, xrd, logger) - def write_nx_section_and_create_file( - self, archive: 'EntryArchive', logger: 'BoundLogger' - ): - """ - Uses the archive to generate the NeXus section and .nxs file. - - Args: - archive (EntryArchive): The archive containing the section. - logger (BoundLogger): A structlog logger. - """ - nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan') - template = dataconverter.template.Template() - dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template) - - template['/ENTRY[entry]/2theta_plot/intensity'] = archive.data.results[ - 0 - ].intensity.magnitude - template['/ENTRY[entry]/2theta_plot/two_theta'] = archive.data.results[ - 0 - ].two_theta.magnitude - template['/ENTRY[entry]/2theta_plot/two_theta/@units'] = str( - archive.data.results[0].two_theta.units - ) - archive_name = archive.metadata.mainfile.split('.')[0] - nexus_output = f'{archive_name}_output.nxs' - - populate_nexus_subsection( - template=template, - app_def='NXxrd_pan', - archive=archive, - logger=logger, - output_file_path=nexus_output, - on_temp_file=self.generate_nexus_file, - ) - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): """ The normalize function of the `ELNXRayDiffraction` section. @@ -1014,7 +977,11 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) if not self.results: return - self.write_nx_section_and_create_file(archive, logger) + + scan_type = xrd_dict.get('metadata', {}).get('scan_type', None) + if self.generate_nexus_file and self.data_file is not None: + write_nx_section_and_create_file(archive, logger, scan_type=scan_type) + self.figures = self.results[0].generate_plots(archive, logger) From b3b90f357d410af4c2a98a54708b0ab67c772d88 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 29 Aug 2024 15:28:16 +0200 Subject: [PATCH 14/41] Run Python test action for all PR --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index ece0b411..49d80603 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -7,7 +7,7 @@ on: push: branches: [ "main" ] pull_request: - branches: [ "main" ] + branches: [ "*" ] permissions: contents: read From 266f2516c65df4eb1575820b37bdf7b00383703e Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 3 Sep 2024 10:51:14 +0200 Subject: [PATCH 15/41] Fix for rebasing issues --- src/nomad_measurements/xrd/__init__.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py index 86e95635..a088a3d4 100644 --- a/src/nomad_measurements/xrd/__init__.py +++ b/src/nomad_measurements/xrd/__init__.py @@ -14,21 +14,6 @@ def load(self): ) -class XRDParserEntryPoint(ParserEntryPoint): - def load(self): - from nomad_measurements.xrd.parser import XRDParser - - return XRDParser(**self.dict()) - - -parser = XRDParserEntryPoint( - name='XRD Parser', - description='Parser for several kinds of raw files from XRD measurements.', - mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$', - mainfile_mime_re='text/.*|application/zip', -) - - class XRDParserEntryPoint(ParserEntryPoint): def load(self): from nomad_measurements.xrd.parser import XRDParser From f82d4a7810e872544f2356cb518b7632c0f15197 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 14 Jan 2025 10:58:38 +0100 Subject: [PATCH 16/41] Use hdf5 references for arrays (#118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * updated plugin structure * added pynxtools dependency * Apply suggestions from code review Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com> Co-authored-by: Hampus Näsström * Add sections for RSM and 1D which uses HDF5 references * Abstract out data interaction using setter and getter; allows to use same methods for classes with hdf5 refs * Use arrays, not references, in the `archive.results` section * Lock the state for using nexus file and corresponding references * Populate results without references * Make a general reader for raw files * Remove nexus flags * Add quantity for auxialiary file * Fix rebase * Make integration_time as hdf5reference * Reset results (refactor) * Add backward compatibility * Refactor reader * add missing imports * AttrDict class * Make concept map global * Add function to remove nexus annotations in concept map * Move try block inside walk_through_object * Fix imports * Add methods for generating hdf5 file * Rename auxiliary file * Expect aux file to be .nxs in the beginning * Add attributes for hdf5: data_dict, dataset_paths * Method for adding a quantity to hdf5_data_dict * Abstract out methods for creating files based on hdf5_data_dict * Add dataset_paths for nexus * Some reverting back * Minor fixes * Refactor populate_hdf5_data_dict: store a reference to be made later * Handle shift from nxs to hdf5 * Set hdf5 references after aux file is created * Cleaning * Fixing * Redefine result sections instead of extending * Remove plotly plots from ELN * Read util for hdf5 ref * Fixing * Move hdf5 handling into a util class * Refactor instance variables * Reset data dicts and reference after each writing * Fixing * Overwrite dataset if it already exists * Refactor add_dataset * Reorganize and doctrings * Rename variable * Add read_dataset method * Cleaning * Adapting schema with hdf5 handler * Cooments, minor refactoring * Fixing; add `hdf5_handler` as an attribute for archive * Reorganization * Fixing * Refactoring * Cleaning * Try block for using hdf5 handler: dont fail early, as later normalization steps will have the handler! * Extract units from dataset attrs when reading * Fixing * Linting * Make archive_path optional in add_dataset * Rename class * attrs for add_dataset; use it for units * Add add_attribute method * Refactor add_attribute * Add plot attributes: 1D * Refactor hdf5 states * Add back plotly figures * rename auxiliary file name if changed by handler * Add referenced plots * Allow hard link using internel reference * Add sections for plots * Comment out validation * Add archive paths for the plot subsections * Add back validation with flag * Use nexus flag * Add interpolated intensity data into h5 for qspace plots * Use prefix to reduce len of string * Store regularized linespace of q vectors; revise descriptions * Remove plotly plots * Bring plots to overview * Fix tests * Linting; remove attr arg from add_dataset * Review: move none check into method * Review: use 'with' for opening h5 file * Review: make internal states as private vars * Add pydantic basemodel for dataset * Use data from variables if available for reading * Review: remove lazy arg * Move DatasetModel outside Handler class * Remove None from get, as it is already a default * Merge if conditions --------- Co-authored-by: Andrea Albino Co-authored-by: Andrea Albino <95371554+aalbino2@users.noreply.github.com> Co-authored-by: Hampus Näsström --- src/nomad_measurements/utils.py | 331 ++++++++++++++ src/nomad_measurements/xrd/nx.py | 185 +------- src/nomad_measurements/xrd/schema.py | 632 ++++++++++++++++++++------- 3 files changed, 827 insertions(+), 321 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 250e030f..876d25b4 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -15,12 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import collections import os.path +import re from typing import ( TYPE_CHECKING, + Any, + Optional, ) +import h5py import numpy as np +import pint +from nomad.datamodel.hdf5 import HDF5Reference +from nomad.units import ureg +from pydantic import BaseModel, Field if TYPE_CHECKING: from nomad.datamodel.data import ( @@ -166,3 +175,325 @@ def get_bounding_range_2d(ax1, ax2): ] return ax1_range, ax2_range + + +class DatasetModel(BaseModel): + """ + Pydantic model for the dataset to be stored in the HDF5 file. + """ + + data: Any = Field(description='The data to be stored in the HDF5 file.') + archive_path: Optional[str] = Field( + None, description='The path of the quantity in the NOMAD archive.' + ) + internal_reference: Optional[bool] = Field( + False, + description='If True, an internal reference is set to an existing HDF5 ' + 'dataset.', + ) + + +class HDF5Handler: + """ + Class for handling the creation of auxiliary files to store big data arrays outside + the main archive file (e.g. HDF5, NeXus). + """ + + def __init__( + self, + filename: str, + archive: 'EntryArchive', + logger: 'BoundLogger', + valid_dataset_paths: list = None, + nexus: bool = False, + ): + """ + Initialize the handler. + + Args: + filename (str): The name of the auxiliary file. + archive (EntryArchive): The NOMAD archive. + logger (BoundLogger): A structlog logger. + valid_dataset_paths (list): The list of valid dataset paths. + nexus (bool): If True, the file is created as a NeXus file. + """ + if not filename.endswith(('.nxs', '.h5')): + raise ValueError('Only .h5 or .nxs files are supported.') + + self.data_file = filename + self.archive = archive + self.logger = logger + self.valid_dataset_paths = [] + if valid_dataset_paths: + self.valid_dataset_paths = valid_dataset_paths + self.nexus = nexus + + self._hdf5_datasets = collections.OrderedDict() + self._hdf5_attributes = collections.OrderedDict() + + def add_dataset( + self, + path: str, + params: dict, + validate_path: bool = True, + ): + """ + Add a dataset to the HDF5 file. The dataset is written lazily to the file + when `write_file` method is called. The `path` is validated against the + `valid_dataset_paths` if provided before adding the data. + + `params` should be a dictionary containing `data`. Optionally, + it can also contain `archive_path` and `internal_reference`: + { + 'data': Any, + 'archive_path': str, + 'internal_reference': bool, + } + + Args: + path (str): The dataset path to be used in the HDF5 file. + params (dict): The dataset parameters. + validate_path (bool): If True, the dataset path is validated. + """ + if not params: + self.logger.warning('Dataset `params` must be provided.') + return + + dataset = DatasetModel( + **params, + ) + if ( + validate_path + and self.valid_dataset_paths + and path not in self.valid_dataset_paths + ): + self.logger.warning(f'Invalid dataset path "{path}".') + return + + # handle the pint.Quantity and add data + if isinstance(dataset.data, pint.Quantity): + self.add_attribute( + path=path, + params=dict( + units=str(dataset.data.units), + ), + ) + dataset.data = dataset.data.magnitude + + self._hdf5_datasets[path] = dataset + + def add_attribute( + self, + path: str, + params: dict, + ): + """ + Add an attribute to the dataset or group at the given path. The attribute is + written lazily to the file when `write_file` method is called. + + Args: + path (str): The dataset or group path in the HDF5 file. + params (dict): The attributes to be added. + """ + if not params: + self.logger.warning('Attribute `params` must be provided.') + return + self._hdf5_attributes[path] = params + + def read_dataset(self, path: str): + """ + Returns the dataset at the given path. If the quantity has `units` as an + attribute, tries to returns a `pint.Quantity`. + If the dataset available in the `self._hdf5_datasets`, it is returned directly. + + Args: + path (str): The dataset path in the HDF5 file. + """ + if path is None: + return + file_path, dataset_path = path.split('#') + + # find path in the instance variables + value = None + if dataset_path in self._hdf5_datasets: + value = self._hdf5_datasets[dataset_path].data + if dataset_path in self._hdf5_attributes: + units = self._hdf5_attributes[dataset_path].get('units') + if units: + value *= ureg(units) + return value + + file_name = file_path.rsplit('/raw/', 1)[1] + with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5: + if dataset_path not in h5: + self.logger.warning(f'Dataset "{dataset_path}" not found.') + else: + value = h5[dataset_path][...] + try: + units = h5[dataset_path].attrs['units'] + value *= ureg(units) + except KeyError: + pass + return value + + def write_file(self): + """ + Method for creating an auxiliary file to store big data arrays outside the + main archive file (e.g. HDF5, NeXus). + """ + if self.nexus: + try: + self._write_nx_file() + except Exception as e: + self.nexus = False + self.logger.warning( + f'Encountered "{e}" error while creating nexus file. ' + 'Creating h5 file instead.' + ) + self._write_hdf5_file() + else: + self._write_hdf5_file() + + def _write_nx_file(self): + """ + Method for creating a NeXus file. Additional data from the archive is added + to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus + view of the data in addition to storing array data. + """ + if self.data_file.endswith('.h5'): + self.data_file = self.data_file.replace('.h5', '.nxs') + raise NotImplementedError('Method `write_nx_file` is not implemented.') + # TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use + # `populate_hdf5_data_dict` method for each quantity that is needed in .nxs + # file. Create a NeXus file with the data in `hdf5_data_dict`. + # One issue here is as we populate the `hdf5_data_dict` with the archive data, + # we will always have to over write the nexus file + + def _write_hdf5_file(self): # noqa: PLR0912 + """ + Method for creating an HDF5 file. + """ + if self.data_file.endswith('.nxs'): + self.data_file = self.data_file.replace('.nxs', '.h5') + if not self._hdf5_datasets and not self._hdf5_attributes: + return + # remove the nexus annotations from the dataset paths if any + tmp_dict = {} + for key, value in self._hdf5_datasets.items(): + new_key = self._remove_nexus_annotations(key) + tmp_dict[new_key] = value + self._hdf5_datasets = tmp_dict + tmp_dict = {} + for key, value in self._hdf5_attributes.items(): + tmp_dict[self._remove_nexus_annotations(key)] = value + self._hdf5_attributes = tmp_dict + + # create the HDF5 file + mode = 'r+b' if self.archive.m_context.raw_path_exists(self.data_file) else 'wb' + with h5py.File( + self.archive.m_context.raw_file(self.data_file, mode), 'a' + ) as h5: + for key, value in self._hdf5_datasets.items(): + if value.data is None: + self.logger.warning(f'No data found for "{key}". Skipping.') + continue + elif value.internal_reference: + # resolve the internal reference + try: + data = h5[self._remove_nexus_annotations(value.data)] + except KeyError: + self.logger.warning( + f'Internal reference "{value.data}" not found. Skipping.' + ) + continue + else: + data = value.data + + group_name, dataset_name = key.rsplit('/', 1) + group = h5.require_group(group_name) + + if key in h5: + group[dataset_name][...] = data + else: + group.create_dataset( + name=dataset_name, + data=data, + ) + self._set_hdf5_reference( + self.archive, + value.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{key}', + ) + for key, value in self._hdf5_attributes.items(): + if key in h5: + h5[key].attrs.update(value) + else: + self.logger.warning(f'Path "{key}" not found to add attribute.') + + # reset hdf5 datasets and atttributes + self._hdf5_datasets = collections.OrderedDict() + self._hdf5_attributes = collections.OrderedDict() + + @staticmethod + def _remove_nexus_annotations(path: str) -> str: + """ + Remove the nexus related annotations from the dataset path. + For e.g., + '/ENTRY[entry]/experiment_result/intensity' -> + '/entry/experiment_result/intensity' + + Args: + path (str): The dataset path with nexus annotations. + + Returns: + str: The dataset path without nexus annotations. + """ + if not path: + return path + + pattern = r'.*\[.*\]' + new_path = '' + for part in path.split('/')[1:]: + if re.match(pattern, part): + new_path += '/' + part.split('[')[0].strip().lower() + else: + new_path += '/' + part + new_path = new_path.replace('.nxs', '.h5') + return new_path + + @staticmethod + def _set_hdf5_reference( + section: 'ArchiveSection' = None, path: str = None, ref: str = None + ): + """ + Method for setting a HDF5Reference quantity in a section. It can handle + nested quantities and repeatable sections, provided that the quantity itself + is of type `HDF5Reference`. + For example, one can set the reference for a quantity path like + `data.results[0].intensity`. + + Args: + section (Section): The NOMAD section containing the quantity. + path (str): The path to the quantity. + ref (str): The reference to the HDF5 dataset. + """ + # TODO handle the case when section in the path is not initialized + + if not section or not path or not ref: + return + attr = section + path = path.split('.') + quantity_name = path.pop() + + for subpath in path: + if re.match(r'.*\[.*\]', subpath): + index = int(subpath.split('[')[1].split(']')[0]) + attr = attr.m_get(subpath.split('[')[0], index=index) + else: + attr = attr.m_get(subpath) + + if isinstance( + attr.m_get_quantity_definition(quantity_name).type, HDF5Reference + ): + attr.m_set(quantity_name, ref) diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py index e1b41fcf..db73af2f 100644 --- a/src/nomad_measurements/xrd/nx.py +++ b/src/nomad_measurements/xrd/nx.py @@ -15,168 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import TYPE_CHECKING -from pynxtools import dataconverter -from pynxtools.nomad.dataconverter import populate_nexus_subsection - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import EntryArchive - from structlog.stdlib import ( - BoundLogger, - ) - - -def walk_through_object(parent_obj, attr_chain, default=None): - """ - Walk though the object until reach the leaf. - - Args: - parent_obj: This is a python obj. - attr_chain: Dot separated obj chain. - default: A value to be returned by default, if not data is found. - """ - expected_parts = 2 - if isinstance(attr_chain, str): - parts = attr_chain.split('.', 1) - - if len(parts) == expected_parts: - child_nm, rest_part = parts - if '[' in child_nm: - child_nm, index = child_nm.split('[') - index = int(index[:-1]) - child_obj = getattr(parent_obj, child_nm)[index] - else: - child_obj = getattr(parent_obj, child_nm) - return walk_through_object(child_obj, rest_part, default=default) - else: - return getattr(parent_obj, attr_chain, default) - - -def connect_concepts(template, archive: 'EntryArchive', scan_type: str): # noqa: PLR0912 - """ - Connect the concepts between `ELNXrayDiffraction` and `NXxrd_pan` schema. - - Args: - template (Template): The pynxtools template, a inherited class from python dict. - archive (EntryArchive): Nomad archive contains secttions, subsections and - quantities. - scan_type (str): Name of the scan type such as line and RSM. - """ - - # General concepts - # ruff: noqa: E501 - concept_map = { - '/ENTRY[entry]/method': 'archive.data.method', - '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', - '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity.magnitude', - '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta.magnitude', - '/ENTRY[entry]/experiment_result/two_theta/@units': 'archive.data.results[0].two_theta.units', - '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega.magnitude', - '/ENTRY[entry]/experiment_result/omega/@units': 'archive.data.results[0].omega.units', - '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi.magnitude', - '/ENTRY[entry]/experiment_result/chi/@units': 'archive.data.results[0].chi.units', - '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi.magnitude', - '/ENTRY[entry]/experiment_result/phi/@units': 'archive.data.results[0].phi.units', - '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis', - '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time.magnitude', - 'line': '', # For future implementation - 'rsm': { - '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel', - '/ENTRY[entry]/experiment_result/q_parallel/@units': 'archive.data.results[0].q_parallel.units', - '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular.magnitude', - '/ENTRY[entry]/experiment_result/q_perpendicular/@units': 'archive.data.results[0].q_perpendicular.units', - '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm.magnitude', - '/ENTRY[entry]/experiment_result/q_norm/@units': 'archive.data.results[0].q_norm.units', - }, - # Source - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current.magnitude', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current/@units': 'archive.data.xrd_settings.source.xray_tube_current.units', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage.magnitude', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage/@units': 'archive.data.xrd_settings.source.xray_tube_voltage.units', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one.magnitude', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one/@units': 'archive.data.xrd_settings.source.kalpha_one.units', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two.magnitude', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two/@units': 'archive.data.xrd_settings.source.kalpha_two.units', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta.magnitude', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta/@units': 'archive.data.xrd_settings.source.kbeta.units', - } - - for key, archive_concept in concept_map.items(): - if isinstance(archive_concept, dict): - if key == scan_type: - for sub_key, sub_archive_concept in archive_concept.items(): - _, arch_attr = sub_archive_concept.split('.', 1) - value = None - try: - value = walk_through_object(archive, arch_attr) - except (AttributeError, IndexError, KeyError, ValueError): - pass - finally: - if value is not None: - template[sub_key] = ( - str(value) if sub_key.endswith('units') else value - ) - else: - continue - elif archive_concept: - _, arch_attr = archive_concept.split('.', 1) - value = None - try: - value = walk_through_object(archive, arch_attr) - # Use multiple excepts to avoid catching all exceptions - except (AttributeError, IndexError, KeyError, ValueError): - pass - finally: - if value is not None: - template[key] = str(value) if key.endswith('units') else value - - template['/ENTRY[entry]/definition'] = 'NXxrd_pan' - - # Links to the data and concepts - template['/ENTRY[entry]/@default'] = 'experiment_result' - template['/ENTRY[entry]/experiment_result/@signal'] = 'intensity' - template['/ENTRY[entry]/experiment_result/@axes'] = 'two_theta' - template['/ENTRY[entry]/q_data/q'] = { - 'link': '/ENTRY[entry]/experiment_result/q_norm' - } - template['/ENTRY[entry]/q_data/intensity'] = { - 'link': '/ENTRY[entry]/experiment_result/intensity' - } - template['/ENTRY[entry]/q_data/q_parallel'] = { - 'link': '/ENTRY[entry]/experiment_result/q_parallel' - } - template['/ENTRY[entry]/q_data/q_perpendicular'] = { - 'link': '/ENTRY[entry]/experiment_result/q_perpendicular' - } - - -def write_nx_section_and_create_file( - archive: 'EntryArchive', logger: 'BoundLogger', scan_type: str = 'line' -): - """ - Uses the archive to generate the NeXus section and .nxs file. - - Args: - archive (EntryArchive): The archive containing the section. - logger (BoundLogger): A structlog logger. - generate_nexus_file (boolean): If True, the function will generate a .nxs file. - nxs_as_entry (boolean): If True, the function will generate a .nxs file - as a nomad entry. - """ - nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan') - template = dataconverter.template.Template() - dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template) - connect_concepts(template, archive, scan_type=scan_type) - archive_name = archive.metadata.mainfile.split('.')[0] - nexus_output = f'{archive_name}.nxs' - - populate_nexus_subsection( - template=template, - app_def='NXxrd_pan', - archive=archive, - logger=logger, - output_file_path=nexus_output, - ) +NEXUS_DATASET_PATHS = [ + '/ENTRY[entry]/experiment_result/intensity', + '/ENTRY[entry]/experiment_result/two_theta', + '/ENTRY[entry]/experiment_result/omega', + '/ENTRY[entry]/experiment_result/chi', + '/ENTRY[entry]/experiment_result/phi', + '/ENTRY[entry]/experiment_config/count_time', + '/ENTRY[entry]/experiment_result/q_norm', + '/ENTRY[entry]/experiment_result/q_parallel', + '/ENTRY[entry]/experiment_result/q_perpendicular', + '/ENTRY[entry]/method', + '/ENTRY[entry]/measurement_type', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', +] diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 2923a17e..80ec1dc3 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -22,19 +22,25 @@ ) import numpy as np +import pint import plotly.express as px from fairmat_readers_xrd import ( read_bruker_brml, read_panalytical_xrdml, read_rigaku_rasx, ) +from nomad.config import config from nomad.datamodel.data import ( ArchiveSection, EntryData, ) +from nomad.datamodel.hdf5 import ( + HDF5Reference, +) from nomad.datamodel.metainfo.annotations import ( ELNAnnotation, ELNComponentEnum, + H5WebAnnotation, ) from nomad.datamodel.metainfo.basesections import ( CompositeSystemReference, @@ -42,10 +48,7 @@ MeasurementResult, ReadableIdentifiers, ) -from nomad.datamodel.metainfo.plot import ( - PlotlyFigure, - PlotSection, -) +from nomad.datamodel.metainfo.plot import PlotlyFigure from nomad.datamodel.results import ( DiffractionPattern, MeasurementMethod, @@ -67,11 +70,14 @@ from nomad_measurements.general import ( NOMADMeasurementsCategory, ) -from nomad_measurements.utils import get_bounding_range_2d, merge_sections -from nomad_measurements.xrd.nx import write_nx_section_and_create_file +from nomad_measurements.utils import ( + HDF5Handler, + get_bounding_range_2d, + merge_sections, +) +from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS if TYPE_CHECKING: - import pint from nomad.datamodel.datamodel import ( EntryArchive, ) @@ -80,18 +86,16 @@ ) -from nomad.config import config - configuration = config.get_plugin_entry_point('nomad_measurements.xrd:schema') m_package = SchemaPackage(aliases=['nomad_measurements.xrd.parser.parser']) def calculate_two_theta_or_q( - wavelength: 'pint.Quantity', - q: 'pint.Quantity' = None, - two_theta: 'pint.Quantity' = None, -) -> tuple['pint.Quantity', 'pint.Quantity']: + wavelength: pint.Quantity, + q: pint.Quantity = None, + two_theta: pint.Quantity = None, +) -> tuple[pint.Quantity, pint.Quantity]: """ Calculate the two-theta array from the scattering vector (q) or vice-versa, given the wavelength of the X-ray source. @@ -113,10 +117,10 @@ def calculate_two_theta_or_q( return q, two_theta -def calculate_q_vectors_RSM( - wavelength: 'pint.Quantity', - two_theta: 'pint.Quantity', - omega: 'pint.Quantity', +def calculate_q_vectors_rsm( + wavelength: pint.Quantity, + two_theta: pint.Quantity, + omega: pint.Quantity, ): """ Calculate the q-vectors for RSM scans in coplanar configuration. @@ -265,6 +269,205 @@ class XRDSettings(ArchiveSection): source = SubSection(section_def=XRayTubeSource) +class XRDResultPlotIntensity(ArchiveSection): + m_def = Section( + a_h5web=H5WebAnnotation( + axes=['two_theta', 'omega', 'phi', 'chi'], signal='intensity' + ) + ) + intensity = Quantity( + type=HDF5Reference, + description='The count at each 2-theta value, dimensionless', + ) + two_theta = Quantity( + type=HDF5Reference, + description='The 2-theta range of the diffractogram', + ) + omega = Quantity( + type=HDF5Reference, + description='The omega range of the diffractogram', + ) + + def normalize(self, archive, logger): + super().normalize(archive, logger) + prefix = '/ENTRY[entry]/experiment_result' + try: + hdf5_handler = self.m_parent.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + if self.intensity is None or self.two_theta is None: + return + + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/two_theta', + params=dict( + data=f'{prefix}/two_theta', + archive_path='data.results[0].plot_intensity.two_theta', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/intensity', + params=dict( + data=f'{prefix}/intensity', + archive_path='data.results[0].plot_intensity.intensity', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity', + params=dict( + axes='two_theta', + signal='intensity', + NX_class='NXdata', + ), + ) + for var_axis in ['omega', 'phi', 'chi']: + if self.get(var_axis) is not None: + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/{var_axis}', + params=dict( + data=f'{prefix}/{var_axis}', + archive_path=f'data.results[0].plot_intensity.{var_axis}', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity', + params=dict( + axes=[var_axis, 'two_theta'], + signal='intensity', + NX_class='NXdata', + ), + ) + break + + hdf5_handler.write_file() + + +class XRDResultPlotIntensityScatteringVector(ArchiveSection): + m_def = Section( + a_h5web=H5WebAnnotation( + axes=['q_parallel', 'q_perpendicular', 'q_norm'], signal='intensity' + ) + ) + intensity = Quantity( + type=HDF5Reference, + description=""" + The count at each q value. In case of RSM, it contains interpolated values of + `intensity` at regularized grid of `q` vectors. + """, + ) + q_norm = Quantity( + type=HDF5Reference, + description='The q range of the diffractogram', + ) + q_parallel = Quantity( + type=HDF5Reference, + description='The regularized grid of `q_parallel` range for plotting.', + ) + q_perpendicular = Quantity( + type=HDF5Reference, + description='The regularized grid of `q_perpendicular` range for plotting.', + ) + + def normalize(self, archive, logger): + super().normalize(archive, logger) + prefix = '/ENTRY[entry]/experiment_result' + try: + hdf5_handler = self.m_parent.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + if self.intensity is None: + return + + if self.q_norm is not None: + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/intensity', + params=dict( + data=f'{prefix}/intensity', + archive_path='data.results[0].plot_intensity_scattering_vector.intensity', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_norm', + params=dict( + data=f'{prefix}/q_norm', + archive_path='data.results[0].plot_intensity_scattering_vector.q_norm', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity_scattering_vector', + params=dict( + axes='q_norm', + signal='intensity', + NX_class='NXdata', + ), + ) + elif self.q_parallel is not None and self.q_perpendicular is not None: + intensity = hdf5_handler.read_dataset(self.intensity) + q_parallel = hdf5_handler.read_dataset(self.q_parallel) + q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + # q_vectors lead to irregular grid + # generate a regular grid using interpolation + x = q_parallel.to('1/angstrom').magnitude.flatten() + y = q_perpendicular.to('1/angstrom').magnitude.flatten() + x_regular = np.linspace(x.min(), x.max(), intensity.shape[0]) + y_regular = np.linspace(y.min(), y.max(), intensity.shape[1]) + x_grid, y_grid = np.meshgrid(x_regular, y_regular) + z_interpolated = griddata( + points=(x, y), + values=intensity.flatten(), + xi=(x_grid, y_grid), + method='linear', + fill_value=intensity.min(), + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_parallel', + params=dict( + data=x_regular, + archive_path='data.results[0].plot_intensity_scattering_vector.q_parallel', + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_perpendicular', + params=dict( + data=y_regular, + archive_path='data.results[0].plot_intensity_scattering_vector.q_perpendicular', + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/intensity', + params=dict( + data=z_interpolated, + archive_path='data.results[0].plot_intensity_scattering_vector.intensity', + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity_scattering_vector', + params=dict( + axes=['q_perpendicular', 'q_parallel'], + signal='intensity', + NX_class='NXdata', + ), + ) + hdf5_handler.write_file() + + class XRDResult(MeasurementResult): """ Section containing the result of an X-ray diffraction scan. @@ -272,52 +475,28 @@ class XRDResult(MeasurementResult): m_def = Section() - array_index = Quantity( - type=np.dtype(np.float64), - shape=['*'], - description=( - 'A placeholder for the indices of vectorial quantities. ' - 'Used as x-axis for plots within quantities.' - ), - a_display={'visible': False}, - ) intensity = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='dimensionless', + type=HDF5Reference, description='The count at each 2-theta value, dimensionless', - a_plot={'x': 'array_index', 'y': 'intensity'}, ) two_theta = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The 2-theta range of the diffractogram', - a_plot={'x': 'array_index', 'y': 'two_theta'}, ) q_norm = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='meter**(-1)', + type=HDF5Reference, description='The norm of scattering vector *Q* of the diffractogram', - a_plot={'x': 'array_index', 'y': 'q_norm'}, ) omega = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The omega range of the diffractogram', ) phi = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The phi range of the diffractogram', ) chi = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The chi range of the diffractogram', ) source_peak_wavelength = Quantity( @@ -331,11 +510,13 @@ class XRDResult(MeasurementResult): description='Axis scanned', ) integration_time = Quantity( - type=np.dtype(np.float64), - unit='s', - shape=['*'], + type=HDF5Reference, description='Integration time per channel', ) + plot_intensity = SubSection(section_def=XRDResultPlotIntensity) + plot_intensity_scattering_vector = SubSection( + section_def=XRDResultPlotIntensityScatteringVector + ) class XRDResult1D(XRDResult): @@ -343,9 +524,7 @@ class XRDResult1D(XRDResult): Section containing the result of a 1D X-ray diffraction scan. """ - m_def = Section() - - def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): + def generate_plots(self): """ Plot the 1D diffractogram. @@ -358,12 +537,20 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): (dict, dict): line_linear, line_log """ plots = [] - if self.two_theta is None or self.intensity is None: + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): return plots - x = self.two_theta.to('degree').magnitude - y = self.intensity.magnitude + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + if two_theta is None or intensity is None: + return plots + x = two_theta.to('degree').magnitude + y = intensity.magnitude fig_line_linear = px.line( x=x, y=y, @@ -449,10 +636,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) ) - if self.q_norm is None: + q_norm = hdf5_handler.read_dataset(self.q_norm) + if q_norm is None: return plots - x = self.q_norm.to('1/angstrom').magnitude + x = q_norm.to('1/angstrom').magnitude fig_line_log = px.line( x=x, y=y, @@ -515,12 +703,45 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): self.name = f'{self.scan_axis} Scan Result' else: self.name = 'XRD Scan Result' + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + if self.source_peak_wavelength is not None: - self.q_norm, self.two_theta = calculate_two_theta_or_q( + q_norm = hdf5_handler.read_dataset(self.q_norm) + two_theta = hdf5_handler.read_dataset(self.two_theta) + q_norm, two_theta = calculate_two_theta_or_q( wavelength=self.source_peak_wavelength, - two_theta=self.two_theta, - q=self.q_norm, + two_theta=two_theta, + q=q_norm, + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_norm', + params=dict( + data=q_norm, + archive_path='data.results[0].q_norm', + ), + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/two_theta', + params=dict( + data=two_theta, + archive_path='data.results[0].two_theta', + ), ) + hdf5_handler.write_file() + self.m_setdefault('plot_intensity_scattering_vector') + self.plot_intensity_scattering_vector.intensity = self.intensity + self.plot_intensity_scattering_vector.q_norm = self.q_norm + self.plot_intensity_scattering_vector.normalize(archive, logger) + + self.m_setdefault('plot_intensity') + self.plot_intensity.intensity = self.intensity + self.plot_intensity.two_theta = self.two_theta + self.plot_intensity.normalize(archive, logger) class XRDResultRSM(XRDResult): @@ -528,27 +749,16 @@ class XRDResultRSM(XRDResult): Section containing the result of a Reciprocal Space Map (RSM) scan. """ - m_def = Section() q_parallel = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='meter**(-1)', + type=HDF5Reference, description='The scattering vector *Q_parallel* of the diffractogram', ) q_perpendicular = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='meter**(-1)', + type=HDF5Reference, description='The scattering vector *Q_perpendicular* of the diffractogram', ) - intensity = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='dimensionless', - description='The count at each position, dimensionless', - ) - def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): + def generate_plots(self): """ Plot the 2D RSM diffractogram. @@ -561,14 +771,24 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): (dict, dict): json_2theta_omega, json_q_vector """ plots = [] - if self.two_theta is None or self.intensity is None or self.omega is None: + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return plots + + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + omega = hdf5_handler.read_dataset(self.omega) + if two_theta is None or intensity is None or omega is None: return plots # Plot for 2theta-omega RSM # Zero values in intensity become -inf in log scale and are not plotted - x = self.omega.to('degree').magnitude - y = self.two_theta.to('degree').magnitude - z = self.intensity.magnitude + x = omega.to('degree').magnitude + y = two_theta.to('degree').magnitude + z = intensity.magnitude log_z = np.log10(z) x_range, y_range = get_bounding_range_2d(x, y) @@ -636,9 +856,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) # Plot for RSM in Q-vectors - if self.q_parallel is not None and self.q_perpendicular is not None: - x = self.q_parallel.to('1/angstrom').magnitude.flatten() - y = self.q_perpendicular.to('1/angstrom').magnitude.flatten() + q_parallel = hdf5_handler.read_dataset(self.q_parallel) + q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + if q_parallel is not None and q_perpendicular is not None: + x = q_parallel.to('1/angstrom').magnitude.flatten() + y = q_perpendicular.to('1/angstrom').magnitude.flatten() # q_vectors lead to irregular grid # generate a regular grid using interpolation x_regular = np.linspace(x.min(), x.max(), z.shape[0]) @@ -721,21 +943,58 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) + if self.name is None: self.name = 'RSM Scan Result' - var_axis = 'omega' - if self.source_peak_wavelength is not None: - for var_axis in ['omega', 'chi', 'phi']: - if ( - self[var_axis] is not None - and len(np.unique(self[var_axis].magnitude)) > 1 - ): - self.q_parallel, self.q_perpendicular = calculate_q_vectors_RSM( - wavelength=self.source_peak_wavelength, - two_theta=self.two_theta * np.ones_like(self.intensity), - omega=self[var_axis], - ) - break + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + var_axis = None + for axis in ['omega', 'chi', 'phi']: + axis_value = hdf5_handler.read_dataset(getattr(self, axis)) + if axis_value is not None and len(np.unique(axis_value.magnitude)) > 1: + var_axis = axis + break + + if self.source_peak_wavelength is not None and var_axis is not None: + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + q_parallel, q_perpendicular = calculate_q_vectors_rsm( + wavelength=self.source_peak_wavelength, + two_theta=two_theta * np.ones_like(intensity), + omega=hdf5_handler.read_dataset(getattr(self, var_axis)), + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_parallel', + params=dict( + data=q_parallel, + archive_path='data.results[0].q_parallel', + ), + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_perpendicular', + params=dict( + data=q_perpendicular, + archive_path='data.results[0].q_perpendicular', + ), + ) + hdf5_handler.write_file() + self.m_setdefault('plot_intensity_scattering_vector') + self.plot_intensity_scattering_vector.intensity = self.intensity + self.plot_intensity_scattering_vector.q_parallel = self.q_parallel + self.plot_intensity_scattering_vector.q_perpendicular = self.q_perpendicular + self.plot_intensity_scattering_vector.normalize(archive, logger) + + if var_axis is not None: + self.m_setdefault('plot_intensity') + self.plot_intensity.intensity = self.intensity + self.plot_intensity.two_theta = self.two_theta + self.plot_intensity.m_set(var_axis, getattr(self, var_axis)) + self.plot_intensity.normalize(archive, logger) class XRayDiffraction(Measurement): @@ -802,31 +1061,39 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): archive.results = Results() if not archive.results.properties: archive.results.properties = Properties() + if not archive.results.method: + archive.results.method = Method( + method_name='XRD', + measurement=MeasurementMethod( + xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name) + ), + ) + + try: + hdf5_handler = self.hdf5_handler + except AttributeError: + return if not archive.results.properties.structural: diffraction_patterns = [] for result in self.results: - if len(result.intensity.shape) == 1: + intensity = hdf5_handler.read_dataset(result.intensity) + if len(intensity.shape) == 1: + two_theta = hdf5_handler.read_dataset(result.two_theta) + q_norm = hdf5_handler.read_dataset(result.q_norm) diffraction_patterns.append( DiffractionPattern( incident_beam_wavelength=result.source_peak_wavelength, - two_theta_angles=result.two_theta, - intensity=result.intensity, - q_vector=result.q_norm, + two_theta_angles=two_theta, + intensity=intensity, + q_vector=q_norm, ) ) archive.results.properties.structural = StructuralProperties( diffraction_pattern=diffraction_patterns ) - if not archive.results.method: - archive.results.method = Method( - method_name='XRD', - measurement=MeasurementMethod( - xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name) - ), - ) -class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): +class ELNXRayDiffraction(XRayDiffraction, EntryData): """ Example section for how XRayDiffraction can be implemented with a general reader for common XRD file types. @@ -841,6 +1108,12 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): a_template={ 'measurement_identifiers': {}, }, + a_h5web=H5WebAnnotation( + paths=[ + 'results/0/plot_intensity', + 'results/0/plot_intensity_scattering_vector', + ] + ), ) data_file = Quantity( type=str, @@ -849,6 +1122,14 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): component=ELNComponentEnum.FileEditQuantity, ), ) + auxiliary_file = Quantity( + type=str, + description='Auxiliary file (like .h5 or .nxs) containing the entry data.', + a_eln=ELNAnnotation( + component=ELNComponentEnum.FileEditQuantity, + ), + ) + hdf5_handler = None measurement_identifiers = SubSection( section_def=ReadableIdentifiers, ) @@ -856,21 +1137,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): diffraction_method_name.m_annotations['eln'] = ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ) - generate_nexus_file = Quantity( - type=bool, - description='Whether or not to generate a NeXus output file (if possible).', - default=True, - a_eln=ELNAnnotation( - component=ELNComponentEnum.BoolEditQuantity, - label='Generate NeXus file', - ), - ) def get_read_write_functions(self) -> tuple[Callable, Callable]: """ Method for getting the correct read and write functions for the current data file. - Returns: tuple[Callable, Callable]: The read, write functions. """ @@ -899,49 +1170,81 @@ def write_xrd_data( metadata_dict: dict = xrd_dict.get('metadata', {}) source_dict: dict = metadata_dict.get('source', {}) - scan_type = metadata_dict.get('scan_type', None) - if scan_type == 'line': - result = XRDResult1D( - intensity=xrd_dict.get('intensity', None), - two_theta=xrd_dict.get('2Theta', None), - omega=xrd_dict.get('Omega', None), - chi=xrd_dict.get('Chi', None), - phi=xrd_dict.get('Phi', None), - scan_axis=metadata_dict.get('scan_axis', None), - integration_time=xrd_dict.get('countTime', None), - ) - result.normalize(archive, logger) + scan_type = metadata_dict.get('scan_type') + if scan_type not in ['line', 'rsm']: + logger.error(f'Scan type `{scan_type}` is not supported.') + return + # Create a new result section + results = [] + result = None + if scan_type == 'line': + result = XRDResult1D() elif scan_type == 'rsm': - result = XRDResultRSM( - intensity=xrd_dict.get('intensity', None), - two_theta=xrd_dict.get('2Theta', None), - omega=xrd_dict.get('Omega', None), - chi=xrd_dict.get('Chi', None), - phi=xrd_dict.get('Phi', None), - scan_axis=metadata_dict.get('scan_axis', None), - integration_time=xrd_dict.get('countTime', None), + result = XRDResultRSM() + + if result is not None: + result.scan_axis = metadata_dict.get('scan_axis') + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/intensity', + params=dict( + data=xrd_dict.get('intensity'), + archive_path='data.results[0].intensity', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/two_theta', + params=dict( + data=xrd_dict.get('2Theta'), + archive_path='data.results[0].two_theta', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/omega', + params=dict( + data=xrd_dict.get('Omega'), + archive_path='data.results[0].omega', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/chi', + params=dict( + data=xrd_dict.get('Chi'), + archive_path='data.results[0].chi', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/phi', + params=dict( + data=xrd_dict.get('Phi'), + archive_path='data.results[0].phi', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_config/count_time', + params=dict( + data=xrd_dict.get('countTime'), + archive_path='data.results[0].integration_time', + ), ) result.normalize(archive, logger) - else: - raise NotImplementedError(f'Scan type `{scan_type}` is not supported.') + results.append(result) source = XRayTubeSource( - xray_tube_material=source_dict.get('anode_material', None), - kalpha_one=source_dict.get('kAlpha1', None), - kalpha_two=source_dict.get('kAlpha2', None), - ratio_kalphatwo_kalphaone=source_dict.get('ratioKAlpha2KAlpha1', None), - kbeta=source_dict.get('kBeta', None), - xray_tube_voltage=source_dict.get('voltage', None), - xray_tube_current=source_dict.get('current', None), + xray_tube_material=source_dict.get('anode_material'), + kalpha_one=source_dict.get('kAlpha1'), + kalpha_two=source_dict.get('kAlpha2'), + ratio_kalphatwo_kalphaone=source_dict.get('ratioKAlpha2KAlpha1'), + kbeta=source_dict.get('kBeta'), + xray_tube_voltage=source_dict.get('voltage'), + xray_tube_current=source_dict.get('current'), ) source.normalize(archive, logger) - xrd_settings = XRDSettings(source=source) xrd_settings.normalize(archive, logger) samples = [] - if metadata_dict.get('sample_id', None) is not None: + if metadata_dict.get('sample_id') is not None: sample = CompositeSystemReference( lab_id=metadata_dict['sample_id'], ) @@ -949,12 +1252,23 @@ def write_xrd_data( samples.append(sample) xrd = ELNXRayDiffraction( - results=[result], + results=results, xrd_settings=xrd_settings, samples=samples, ) + merge_sections(self, xrd, logger) + def backward_compatibility(self): + """ + Method for backward compatibility. + """ + # Migration to using HFD5References: removing exisiting results + if self.get('results'): + self.results = [] + if self.get('figures'): + self.figures = [] + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): """ The normalize function of the `ELNXRayDiffraction` section. @@ -964,7 +1278,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): normalized. logger (BoundLogger): A structlog logger. """ + self.backward_compatibility() if self.data_file is not None: + self.auxiliary_file = f'{self.data_file}.nxs' + self.hdf5_handler = HDF5Handler( + filename=self.auxiliary_file, + archive=archive, + logger=logger, + valid_dataset_paths=NEXUS_DATASET_PATHS, + nexus=True, + ) read_function, write_function = self.get_read_write_functions() if read_function is None or write_function is None: logger.warn( @@ -974,15 +1297,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): with archive.m_context.raw_file(self.data_file) as file: xrd_dict = read_function(file.name, logger) write_function(xrd_dict, archive, logger) + self.hdf5_handler.write_file() + if self.hdf5_handler.data_file != self.auxiliary_file: + self.auxiliary_file = self.hdf5_handler.data_file super().normalize(archive, logger) - if not self.results: - return - - scan_type = xrd_dict.get('metadata', {}).get('scan_type', None) - if self.generate_nexus_file and self.data_file is not None: - write_nx_section_and_create_file(archive, logger, scan_type=scan_type) - - self.figures = self.results[0].generate_plots(archive, logger) class RawFileXRDData(EntryData): From 2d02036a17090df6670e0750b0e79a9bfdcad60d Mon Sep 17 00:00:00 2001 From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:33:03 +0100 Subject: [PATCH 17/41] Adding nexus in ref (#150) * Remove the Nexus file before regenerating it. * Reference to the NeXus entry. * PR review comments. --- src/nomad_measurements/utils.py | 146 +++++++++++++++++++++++++-- src/nomad_measurements/xrd/nx.py | 33 ++++++ src/nomad_measurements/xrd/schema.py | 20 ++++ 3 files changed, 189 insertions(+), 10 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 876d25b4..56066f63 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -16,6 +16,7 @@ # limitations under the License. # import collections +import copy import os.path import re from typing import ( @@ -30,6 +31,14 @@ from nomad.datamodel.hdf5 import HDF5Reference from nomad.units import ureg from pydantic import BaseModel, Field +from pynxtools.dataconverter.helpers import ( + generate_template_from_nxdl, + get_nxdl_root_and_path, +) +from pynxtools.dataconverter.template import Template +from pynxtools.dataconverter.writer import Writer as pynxtools_writer + +from nomad_measurements.xrd.nx import CONCEPT_MAP if TYPE_CHECKING: from nomad.datamodel.data import ( @@ -43,6 +52,10 @@ ) +class NXFileGenerationError(Exception): + pass + + def get_reference(upload_id: str, entry_id: str) -> str: return f'../uploads/{upload_id}/archive/{entry_id}#data' @@ -347,9 +360,13 @@ def write_file(self): except Exception as e: self.nexus = False self.logger.warning( - f'Encountered "{e}" error while creating nexus file. ' - 'Creating h5 file instead.' + f"""NeXusFileGenerationError: Encountered '{e}' error while creating + nexus file. Creating h5 file instead.""" ) + if self.archive.m_context.raw_path_exists(self.data_file): + os.remove( + os.path.join(self.archive.m_context.raw_path(), self.data_file) + ) self._write_hdf5_file() else: self._write_hdf5_file() @@ -360,14 +377,62 @@ def _write_nx_file(self): to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus view of the data in addition to storing array data. """ - if self.data_file.endswith('.h5'): - self.data_file = self.data_file.replace('.h5', '.nxs') - raise NotImplementedError('Method `write_nx_file` is not implemented.') - # TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use - # `populate_hdf5_data_dict` method for each quantity that is needed in .nxs - # file. Create a NeXus file with the data in `hdf5_data_dict`. - # One issue here is as we populate the `hdf5_data_dict` with the archive data, - # we will always have to over write the nexus file + from nomad.processing.data import Entry + + app_def = 'NXxrd_pan' + nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def) + template = Template() + generate_template_from_nxdl(nxdl_root, template) + attr_dict = {} + dataset_dict = {} + self.populate_nx_dataset_and_attribute( + attr_dict=attr_dict, dataset_dict=dataset_dict + ) + for nx_path, dset_original in list(self._hdf5_datasets.items()) + list( + dataset_dict.items() + ): + dset = copy.deepcopy(dset_original) + if dset.internal_reference: + # convert to the nexus type link + dset.data = {'link': self._remove_nexus_annotations(dset.data)} + + try: + template[nx_path] = dset.data + except KeyError: + template['optional'][nx_path] = dset.data + + hdf5_path = self._remove_nexus_annotations(nx_path) + self._set_hdf5_reference( + self.archive, + dset.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{hdf5_path}', + ) + for nx_path, attr_d in list(self._hdf5_attributes.items()) + list( + attr_dict.items() + ): + for attr_k, attr_v in attr_d.items(): + if attr_v != 'dimensionless' and attr_v: + try: + template[f'{nx_path}/@{attr_k}'] = attr_v + except KeyError: + template['optional'][f'{nx_path}/@{attr_k}'] = attr_v + + nx_full_file_path = os.path.join( + self.archive.m_context.raw_path(), self.data_file + ) + + if self.archive.m_context.raw_path_exists(self.data_file): + os.remove(nx_full_file_path) + pynxtools_writer( + data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path + ).write() + + entry_list = Entry.objects( + upload_id=self.archive.m_context.upload_id, mainfile=self.data_file + ) + if not entry_list: + self.archive.m_context.process_updated_raw_file(self.data_file) def _write_hdf5_file(self): # noqa: PLR0912 """ @@ -435,6 +500,67 @@ def _write_hdf5_file(self): # noqa: PLR0912 self._hdf5_datasets = collections.OrderedDict() self._hdf5_attributes = collections.OrderedDict() + @staticmethod + def walk_through_object(parent_obj, attr_chain): + """ + Walk though the object until reach the leaf. + + Args: + parent_obj: This is a python obj. + e.g.Arvhive + attr_chain: Dot separated obj chain. + e.g. 'archive.data.xrd_settings.source.xray_tube_material' + default: A value to be returned by default, if not data is found. + """ + if parent_obj is None: + return parent_obj + + if isinstance(attr_chain, str) and attr_chain.startswith('archive.'): + parts = attr_chain.split('.') + child_obj = None + for part in parts[1:]: + child_nm = part + if '[' in child_nm: + child_nm, index = child_nm.split('[') + index = int(index[:-1]) + # section always exists + child_obj = getattr(parent_obj, child_nm)[index] + else: + child_obj = getattr(parent_obj, child_nm, None) + if child_obj is None: + return None + parent_obj = child_obj + + return child_obj + + def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict): + """Construct datasets and attributes for nexus and populate.""" + + for nx_path, arch_path in CONCEPT_MAP.items(): + if arch_path.startswith('archive.'): + data = self.walk_through_object(self.archive, arch_path) + else: + data = arch_path # default value + + dataset = DatasetModel( + data=data, + ) + + if ( + isinstance(data, pint.Quantity) + and str(data.units) != 'dimensionless' + and str(data.units) + ): + attr_tmp = {nx_path: dict(units=str(data.units))} + attr_dict |= attr_tmp + dataset.data = data.magnitude + + l_part, r_part = nx_path.split('/', 1) + if r_part.startswith('@'): + attr_dict[l_part] = {r_part.replace('@', ''): data} + else: + dataset_dict[nx_path] = dataset + @staticmethod def _remove_nexus_annotations(path: str) -> str: """ diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py index db73af2f..21474aea 100644 --- a/src/nomad_measurements/xrd/nx.py +++ b/src/nomad_measurements/xrd/nx.py @@ -37,3 +37,36 @@ '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', ] + + +CONCEPT_MAP = { + '/ENTRY[entry]/@default': 'experiment_result', + '/ENTRY[entry]/definition': 'NXxrd_pan', + '/ENTRY[entry]/method': 'archive.data.method', + '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', + '/ENTRY[entry]/experiment_result/@signal': 'intensity', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': ( + 'archive.data.results[0].scan_axis' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': ( + 'archive.data.xrd_settings.source.xray_tube_material' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': ( + 'archive.data.xrd_settings.source.xray_tube_current' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': ( + 'archive.data.xrd_settings.source.xray_tube_voltage' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': ( + 'archive.data.xrd_settings.source.kalpha_one' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': ( + 'archive.data.xrd_settings.source.kalpha_two' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': ( + 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone' + ), + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': ( + 'archive.data.xrd_settings.source.kbeta' + ), +} diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 80ec1dc3..7ba2f994 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # + from typing import ( TYPE_CHECKING, Any, @@ -73,6 +74,8 @@ from nomad_measurements.utils import ( HDF5Handler, get_bounding_range_2d, + get_entry_id_from_file_name, + get_reference, merge_sections, ) from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS @@ -1137,6 +1140,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData): diffraction_method_name.m_annotations['eln'] = ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ) + nexus_results = Quantity( + type=ArchiveSection, + description='Reference to the NeXus entry.', + a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity), + ) def get_read_write_functions(self) -> tuple[Callable, Callable]: """ @@ -1300,6 +1308,18 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): self.hdf5_handler.write_file() if self.hdf5_handler.data_file != self.auxiliary_file: self.auxiliary_file = self.hdf5_handler.data_file + + if archive.m_context.raw_path_exists( + self.auxiliary_file + ) and self.auxiliary_file.endswith('.nxs'): + nx_entry_id = get_entry_id_from_file_name( + archive=archive, file_name=self.auxiliary_file + ) + ref_to_nx_entry_data = get_reference( + archive.metadata.upload_id, nx_entry_id + ) + self.nexus_results = f'{ref_to_nx_entry_data}' + super().normalize(archive, logger) From 62569dd35ac9dc0ad8395c2eab2ab2afb95ead95 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Fri, 20 Dec 2024 17:29:38 +0100 Subject: [PATCH 18/41] Move common functionality to resolve_path --- src/nomad_measurements/utils.py | 101 +++++++++++++++----------------- 1 file changed, 46 insertions(+), 55 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 56066f63..1e707973 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -500,51 +500,16 @@ def _write_hdf5_file(self): # noqa: PLR0912 self._hdf5_datasets = collections.OrderedDict() self._hdf5_attributes = collections.OrderedDict() - @staticmethod - def walk_through_object(parent_obj, attr_chain): - """ - Walk though the object until reach the leaf. - - Args: - parent_obj: This is a python obj. - e.g.Arvhive - attr_chain: Dot separated obj chain. - e.g. 'archive.data.xrd_settings.source.xray_tube_material' - default: A value to be returned by default, if not data is found. - """ - if parent_obj is None: - return parent_obj - - if isinstance(attr_chain, str) and attr_chain.startswith('archive.'): - parts = attr_chain.split('.') - child_obj = None - for part in parts[1:]: - child_nm = part - if '[' in child_nm: - child_nm, index = child_nm.split('[') - index = int(index[:-1]) - # section always exists - child_obj = getattr(parent_obj, child_nm)[index] - else: - child_obj = getattr(parent_obj, child_nm, None) - if child_obj is None: - return None - parent_obj = child_obj - - return child_obj - def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict): """Construct datasets and attributes for nexus and populate.""" for nx_path, arch_path in CONCEPT_MAP.items(): if arch_path.startswith('archive.'): - data = self.walk_through_object(self.archive, arch_path) + data = resolve_path(self.archive, arch_path.split('archive.', 1)[1]) else: data = arch_path # default value - dataset = DatasetModel( - data=data, - ) + dataset = DatasetModel(data=data) if ( isinstance(data, pint.Quantity) @@ -593,33 +558,59 @@ def _set_hdf5_reference( section: 'ArchiveSection' = None, path: str = None, ref: str = None ): """ - Method for setting a HDF5Reference quantity in a section. It can handle - nested quantities and repeatable sections, provided that the quantity itself - is of type `HDF5Reference`. + Method for setting a HDF5Reference quantity in a section. For example, one can set the reference for a quantity path like `data.results[0].intensity`. + In case the section is not initialized, the method returns without setting + the reference. Args: section (Section): The NOMAD section containing the quantity. path (str): The path to the quantity. ref (str): The reference to the HDF5 dataset. """ - # TODO handle the case when section in the path is not initialized - if not section or not path or not ref: return - attr = section - path = path.split('.') - quantity_name = path.pop() - - for subpath in path: - if re.match(r'.*\[.*\]', subpath): - index = int(subpath.split('[')[1].split(']')[0]) - attr = attr.m_get(subpath.split('[')[0], index=index) - else: - attr = attr.m_get(subpath) - if isinstance( - attr.m_get_quantity_definition(quantity_name).type, HDF5Reference + section_path, quantity_name = path.rsplit('.', 1) + resolved_section = resolve_path(section, section_path) + + if resolved_section and isinstance( + resolved_section.m_get_quantity_definition(quantity_name).type, + HDF5Reference, ): - attr.m_set(quantity_name, ref) + resolved_section.m_set(quantity_name, ref) + + +def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None): + """ + Resolves the attribute path within the given NOMAD section. + + Args: + section (ArchiveSection): The NOMAD section. + path (str): The dot-separated path to the attribute. + logger (BoundLogger): A structlog logger. + + Returns: + The resolved section or attribute or None if not found. + """ + attr = section + parts = path.split('.') + try: + for part in parts: + attr_path = part + if re.match(r'.*\[.*\]', attr_path): + attr_path, index = part[:-1].split('[') + index = int(index) + else: + index = None + attr = attr.m_get(attr_path, index=index) + except (KeyError, ValueError, AttributeError) as e: + if logger: + logger.error( + f'Unable to resolve part "{part}" of the given path "{path}". ' + f'Encountered error "{e}".' + ) + return None + + return attr From 6e47b5bd118f6dcaf52bda3d6c8b282800af9493 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 7 Jan 2025 16:26:24 +0100 Subject: [PATCH 19/41] Allow reading with dataset_path alone --- src/nomad_measurements/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 1e707973..11b0a8ff 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -269,7 +269,7 @@ def add_dataset( validate_path (bool): If True, the dataset path is validated. """ if not params: - self.logger.warning('Dataset `params` must be provided.') + self.logger.warning('Dataset `params` not provided.') return dataset = DatasetModel( @@ -309,7 +309,7 @@ def add_attribute( params (dict): The attributes to be added. """ if not params: - self.logger.warning('Attribute `params` must be provided.') + self.logger.warning('Attribute `params` not provided.') return self._hdf5_attributes[path] = params @@ -324,7 +324,10 @@ def read_dataset(self, path: str): """ if path is None: return - file_path, dataset_path = path.split('#') + if '#' not in path: + file_path, dataset_path = None, path + else: + file_path, dataset_path = path.rsplit('#', 1) # find path in the instance variables value = None @@ -336,6 +339,8 @@ def read_dataset(self, path: str): value *= ureg(units) return value + if not file_path: + return file_name = file_path.rsplit('/raw/', 1)[1] with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5: if dataset_path not in h5: From 56abe1e0ed0529918872a8be64b5bd8cc56db380 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 14 Jan 2025 12:06:46 +0100 Subject: [PATCH 20/41] Allow reading with archive paths --- src/nomad_measurements/utils.py | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 11b0a8ff..8158e1df 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -243,6 +243,7 @@ def __init__( self._hdf5_datasets = collections.OrderedDict() self._hdf5_attributes = collections.OrderedDict() + self._hdf5_path_map = collections.OrderedDict() def add_dataset( self, @@ -294,6 +295,8 @@ def add_dataset( dataset.data = dataset.data.magnitude self._hdf5_datasets[path] = dataset + if dataset.archive_path: + self._hdf5_path_map[dataset.archive_path] = path def add_attribute( self, @@ -313,7 +316,7 @@ def add_attribute( return self._hdf5_attributes[path] = params - def read_dataset(self, path: str): + def read_dataset(self, path: str, is_archive_path: bool = False): """ Returns the dataset at the given path. If the quantity has `units` as an attribute, tries to returns a `pint.Quantity`. @@ -321,9 +324,14 @@ def read_dataset(self, path: str): Args: path (str): The dataset path in the HDF5 file. + is_archive_path (bool): If True, the path is resolved from the archive path. """ if path is None: return + if is_archive_path and path in self._hdf5_path_map: + path = self._hdf5_path_map[path] + if path is None: + return if '#' not in path: file_path, dataset_path = None, path else: @@ -339,20 +347,22 @@ def read_dataset(self, path: str): value *= ureg(units) return value - if not file_path: - return - file_name = file_path.rsplit('/raw/', 1)[1] - with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5: - if dataset_path not in h5: - self.logger.warning(f'Dataset "{dataset_path}" not found.') - else: - value = h5[dataset_path][...] - try: - units = h5[dataset_path].attrs['units'] - value *= ureg(units) - except KeyError: - pass - return value + # find path in the HDF5 file + if file_path: + file_name = file_path.rsplit('/raw/', 1)[1] + with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5: + if dataset_path not in h5: + self.logger.warning(f'Dataset "{dataset_path}" not found.') + else: + value = h5[dataset_path][...] + try: + units = h5[dataset_path].attrs['units'] + value *= ureg(units) + except KeyError: + pass + return value + + return None def write_file(self): """ From 3b3cde9cf801a93617b991e61a2a9bf484d29336 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 14 Jan 2025 12:07:19 +0100 Subject: [PATCH 21/41] Never reset the instance variables --- src/nomad_measurements/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 8158e1df..443a1a39 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -511,10 +511,6 @@ def _write_hdf5_file(self): # noqa: PLR0912 else: self.logger.warning(f'Path "{key}" not found to add attribute.') - # reset hdf5 datasets and atttributes - self._hdf5_datasets = collections.OrderedDict() - self._hdf5_attributes = collections.OrderedDict() - def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict): """Construct datasets and attributes for nexus and populate.""" From 162f5877e3f9cfc2419bb54fcc9844a9666f05a9 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 14 Jan 2025 12:08:02 +0100 Subject: [PATCH 22/41] Final file writing when the instance is deleted --- src/nomad_measurements/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 443a1a39..f8672226 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -592,6 +592,10 @@ def _set_hdf5_reference( ): resolved_section.m_set(quantity_name, ref) + def __del__(self): + if self._hdf5_datasets or self._hdf5_attributes: + self.write_file() + def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None): """ From 9573636866a5c1dd9c0d8943b8b9de7593c51b70 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 14 Jan 2025 15:48:38 +0100 Subject: [PATCH 23/41] Reduce write_file calls to one --- src/nomad_measurements/xrd/schema.py | 168 ++++++++++++++++++--------- 1 file changed, 116 insertions(+), 52 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 7ba2f994..5b1089d8 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -290,6 +290,14 @@ class XRDResultPlotIntensity(ArchiveSection): type=HDF5Reference, description='The omega range of the diffractogram', ) + phi = Quantity( + type=HDF5Reference, + description='The phi range of the diffractogram', + ) + chi = Quantity( + type=HDF5Reference, + description='The chi range of the diffractogram', + ) def normalize(self, archive, logger): super().normalize(archive, logger) @@ -300,9 +308,6 @@ def normalize(self, archive, logger): except (AttributeError, AssertionError): return - if self.intensity is None or self.two_theta is None: - return - hdf5_handler.add_dataset( path=f'{prefix}/plot_intensity/two_theta', params=dict( @@ -329,8 +334,15 @@ def normalize(self, archive, logger): NX_class='NXdata', ), ) + if isinstance(self.m_parent, XRDResult1D): + return + for var_axis in ['omega', 'phi', 'chi']: - if self.get(var_axis) is not None: + var_axis_data = hdf5_handler.read_dataset( + path=f'data.results[0].{var_axis}', + is_archive_path=True, + ) + if var_axis_data is not None: hdf5_handler.add_dataset( path=f'{prefix}/plot_intensity/{var_axis}', params=dict( @@ -350,8 +362,6 @@ def normalize(self, archive, logger): ) break - hdf5_handler.write_file() - class XRDResultPlotIntensityScatteringVector(ArchiveSection): m_def = Section( @@ -388,10 +398,24 @@ def normalize(self, archive, logger): except (AttributeError, AssertionError): return - if self.intensity is None: - return + intensity = hdf5_handler.read_dataset( + path='data.results[0].intensity', + is_archive_path=True, + ) + q_norm = hdf5_handler.read_dataset( + path='data.results[0].q_norm', + is_archive_path=True, + ) + q_parallel = hdf5_handler.read_dataset( + path='data.results[0].q_parallel', + is_archive_path=True, + ) + q_perpendicular = hdf5_handler.read_dataset( + path='data.results[0].q_perpendicular', + is_archive_path=True, + ) - if self.q_norm is not None: + if q_norm is not None: hdf5_handler.add_dataset( path=f'{prefix}/plot_intensity_scattering_vector/intensity', params=dict( @@ -418,10 +442,7 @@ def normalize(self, archive, logger): NX_class='NXdata', ), ) - elif self.q_parallel is not None and self.q_perpendicular is not None: - intensity = hdf5_handler.read_dataset(self.intensity) - q_parallel = hdf5_handler.read_dataset(self.q_parallel) - q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + elif q_parallel is not None and q_perpendicular is not None: # q_vectors lead to irregular grid # generate a regular grid using interpolation x = q_parallel.to('1/angstrom').magnitude.flatten() @@ -468,7 +489,6 @@ def normalize(self, archive, logger): NX_class='NXdata', ), ) - hdf5_handler.write_file() class XRDResult(MeasurementResult): @@ -547,8 +567,14 @@ def generate_plots(self): except (AttributeError, AssertionError): return plots - two_theta = hdf5_handler.read_dataset(self.two_theta) - intensity = hdf5_handler.read_dataset(self.intensity) + two_theta = hdf5_handler.read_dataset( + path='data.results[0].two_theta', + is_archive_path=True, + ) + intensity = hdf5_handler.read_dataset( + path='data.results[0].intensity', + is_archive_path=True, + ) if two_theta is None or intensity is None: return plots @@ -639,7 +665,10 @@ def generate_plots(self): ) ) - q_norm = hdf5_handler.read_dataset(self.q_norm) + q_norm = hdf5_handler.read_dataset( + path='data.results[0].q_norm', + is_archive_path=True, + ) if q_norm is None: return plots @@ -713,9 +742,22 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): except (AttributeError, AssertionError): return + intensity = hdf5_handler.read_dataset( + path='data.results[0].intensity', + is_archive_path=True, + ) + two_theta = hdf5_handler.read_dataset( + path='data.results[0].two_theta', + is_archive_path=True, + ) + if intensity is None or two_theta is None: + return + if self.source_peak_wavelength is not None: - q_norm = hdf5_handler.read_dataset(self.q_norm) - two_theta = hdf5_handler.read_dataset(self.two_theta) + q_norm = hdf5_handler.read_dataset( + path='data.results[0].q_norm', + is_archive_path=True, + ) q_norm, two_theta = calculate_two_theta_or_q( wavelength=self.source_peak_wavelength, two_theta=two_theta, @@ -735,15 +777,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): archive_path='data.results[0].two_theta', ), ) - hdf5_handler.write_file() self.m_setdefault('plot_intensity_scattering_vector') - self.plot_intensity_scattering_vector.intensity = self.intensity - self.plot_intensity_scattering_vector.q_norm = self.q_norm self.plot_intensity_scattering_vector.normalize(archive, logger) self.m_setdefault('plot_intensity') - self.plot_intensity.intensity = self.intensity - self.plot_intensity.two_theta = self.two_theta self.plot_intensity.normalize(archive, logger) @@ -781,9 +818,18 @@ def generate_plots(self): except (AttributeError, AssertionError): return plots - two_theta = hdf5_handler.read_dataset(self.two_theta) - intensity = hdf5_handler.read_dataset(self.intensity) - omega = hdf5_handler.read_dataset(self.omega) + two_theta = hdf5_handler.read_dataset( + path='data.results[0].two_theta', + is_archive_path=True, + ) + intensity = hdf5_handler.read_dataset( + path='data.results[0].intensity', + is_archive_path=True, + ) + omega = hdf5_handler.read_dataset( + path='data.results[0].omega', + is_archive_path=True, + ) if two_theta is None or intensity is None or omega is None: return plots @@ -859,8 +905,14 @@ def generate_plots(self): ) # Plot for RSM in Q-vectors - q_parallel = hdf5_handler.read_dataset(self.q_parallel) - q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + q_parallel = hdf5_handler.read_dataset( + path='data.results[0].q_parallel', + is_archive_path=True, + ) + q_perpendicular = hdf5_handler.read_dataset( + path='data.results[0].q_perpendicular', + is_archive_path=True, + ) if q_parallel is not None and q_perpendicular is not None: x = q_parallel.to('1/angstrom').magnitude.flatten() y = q_perpendicular.to('1/angstrom').magnitude.flatten() @@ -956,20 +1008,34 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): except (AttributeError, AssertionError): return + intensity = hdf5_handler.read_dataset( + path='data.results[0].intensity', + is_archive_path=True, + ) + two_theta = hdf5_handler.read_dataset( + path='data.results[0].two_theta', + is_archive_path=True, + ) var_axis = None for axis in ['omega', 'chi', 'phi']: - axis_value = hdf5_handler.read_dataset(getattr(self, axis)) + axis_value = hdf5_handler.read_dataset( + path=f'data.results[0].{axis}', + is_archive_path=True, + ) if axis_value is not None and len(np.unique(axis_value.magnitude)) > 1: var_axis = axis break + if intensity is None or two_theta is None or var_axis is None: + return - if self.source_peak_wavelength is not None and var_axis is not None: - two_theta = hdf5_handler.read_dataset(self.two_theta) - intensity = hdf5_handler.read_dataset(self.intensity) + if self.source_peak_wavelength is not None: q_parallel, q_perpendicular = calculate_q_vectors_rsm( wavelength=self.source_peak_wavelength, two_theta=two_theta * np.ones_like(intensity), - omega=hdf5_handler.read_dataset(getattr(self, var_axis)), + omega=hdf5_handler.read_dataset( + path=f'data.results[0].{var_axis}', + is_archive_path=True, + ), ) hdf5_handler.add_dataset( path='/ENTRY[entry]/experiment_result/q_parallel', @@ -985,19 +1051,11 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): archive_path='data.results[0].q_perpendicular', ), ) - hdf5_handler.write_file() self.m_setdefault('plot_intensity_scattering_vector') - self.plot_intensity_scattering_vector.intensity = self.intensity - self.plot_intensity_scattering_vector.q_parallel = self.q_parallel - self.plot_intensity_scattering_vector.q_perpendicular = self.q_perpendicular self.plot_intensity_scattering_vector.normalize(archive, logger) - if var_axis is not None: - self.m_setdefault('plot_intensity') - self.plot_intensity.intensity = self.intensity - self.plot_intensity.two_theta = self.two_theta - self.plot_intensity.m_set(var_axis, getattr(self, var_axis)) - self.plot_intensity.normalize(archive, logger) + self.m_setdefault('plot_intensity') + self.plot_intensity.normalize(archive, logger) class XRayDiffraction(Measurement): @@ -1079,10 +1137,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): if not archive.results.properties.structural: diffraction_patterns = [] for result in self.results: - intensity = hdf5_handler.read_dataset(result.intensity) + intensity = hdf5_handler.read_dataset( + 'data.results[0].intensity', is_archive_path=True + ) if len(intensity.shape) == 1: - two_theta = hdf5_handler.read_dataset(result.two_theta) - q_norm = hdf5_handler.read_dataset(result.q_norm) + two_theta = hdf5_handler.read_dataset( + 'data.results[0].two_theta', is_archive_path=True + ) + q_norm = hdf5_handler.read_dataset( + 'data.results[0].q_norm', is_archive_path=True + ) diffraction_patterns.append( DiffractionPattern( incident_beam_wavelength=result.source_peak_wavelength, @@ -1305,10 +1369,12 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): with archive.m_context.raw_file(self.data_file) as file: xrd_dict = read_function(file.name, logger) write_function(xrd_dict, archive, logger) - self.hdf5_handler.write_file() - if self.hdf5_handler.data_file != self.auxiliary_file: - self.auxiliary_file = self.hdf5_handler.data_file + super().normalize(archive, logger) + + self.hdf5_handler.write_file() + if self.hdf5_handler.data_file != self.auxiliary_file: + self.auxiliary_file = self.hdf5_handler.data_file if archive.m_context.raw_path_exists( self.auxiliary_file ) and self.auxiliary_file.endswith('.nxs'): @@ -1320,8 +1386,6 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) self.nexus_results = f'{ref_to_nx_entry_data}' - super().normalize(archive, logger) - class RawFileXRDData(EntryData): """ From 369dc72bee09dd23a4a35e0a04bedad78647463e Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Wed, 15 Jan 2025 11:33:03 +0100 Subject: [PATCH 24/41] Fix: set hard links for internal ref --- src/nomad_measurements/utils.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index f8672226..44fb6859 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -474,10 +474,11 @@ def _write_hdf5_file(self): # noqa: PLR0912 self.archive.m_context.raw_file(self.data_file, mode), 'a' ) as h5: for key, value in self._hdf5_datasets.items(): - if value.data is None: + data = value.data + if data is None: self.logger.warning(f'No data found for "{key}". Skipping.') continue - elif value.internal_reference: + if value.internal_reference: # resolve the internal reference try: data = h5[self._remove_nexus_annotations(value.data)] @@ -486,15 +487,19 @@ def _write_hdf5_file(self): # noqa: PLR0912 f'Internal reference "{value.data}" not found. Skipping.' ) continue - else: - data = value.data group_name, dataset_name = key.rsplit('/', 1) group = h5.require_group(group_name) if key in h5: - group[dataset_name][...] = data + # remove the existing dataset if any + del h5[key] + + if value.internal_reference: + # create a hard link to the existing dataset + group[dataset_name] = data else: + # create the dataset group.create_dataset( name=dataset_name, data=data, From aa0277c384810d49fdb46c511d269a4782e46201 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Wed, 15 Jan 2025 11:56:20 +0100 Subject: [PATCH 25/41] remove del method; fix test --- src/nomad_measurements/utils.py | 4 ---- tests/test_xrd.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 44fb6859..6722e186 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -597,10 +597,6 @@ def _set_hdf5_reference( ): resolved_section.m_set(quantity_name, ref) - def __del__(self): - if self._hdf5_datasets or self._hdf5_attributes: - self.write_file() - def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None): """ diff --git a/tests/test_xrd.py b/tests/test_xrd.py index 124be398..4aa061d6 100644 --- a/tests/test_xrd.py +++ b/tests/test_xrd.py @@ -17,6 +17,7 @@ # import pytest from nomad.client import normalize_all +from nomad_measurements.xrd.schema import XRDResult1D test_files = [ 'tests/data/xrd/XRD-918-16_10.xrdml', @@ -52,7 +53,7 @@ def test_normalize_all(parsed_measurement_archive, caplog): assert parsed_measurement_archive.data.results[ 0 ].source_peak_wavelength.magnitude == pytest.approx(1.540598, 1e-2) - if len(parsed_measurement_archive.data.results[0].intensity.shape) == 1: + if isinstance(parsed_measurement_archive.data.results[0], XRDResult1D): assert ( parsed_measurement_archive.results.properties.structural.diffraction_pattern[ 0 From a71db65b9e70292858798d39046f819dc42f29c9 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 16 Jan 2025 11:35:30 +0100 Subject: [PATCH 26/41] Combine nexus dataset map --- src/nomad_measurements/utils.py | 23 ++++++++-------- src/nomad_measurements/xrd/nx.py | 41 ++++++++++++---------------- src/nomad_measurements/xrd/schema.py | 5 ++-- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 6722e186..06ea941b 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -38,8 +38,6 @@ from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.writer import Writer as pynxtools_writer -from nomad_measurements.xrd.nx import CONCEPT_MAP - if TYPE_CHECKING: from nomad.datamodel.data import ( ArchiveSection, @@ -217,8 +215,7 @@ def __init__( filename: str, archive: 'EntryArchive', logger: 'BoundLogger', - valid_dataset_paths: list = None, - nexus: bool = False, + nexus_dataset_map: dict = None, ): """ Initialize the handler. @@ -227,8 +224,8 @@ def __init__( filename (str): The name of the auxiliary file. archive (EntryArchive): The NOMAD archive. logger (BoundLogger): A structlog logger. - valid_dataset_paths (list): The list of valid dataset paths. - nexus (bool): If True, the file is created as a NeXus file. + nexus_dataset_map (dict): The NeXus dataset map containing the nexus file + dataset paths and the corresponding archive paths. """ if not filename.endswith(('.nxs', '.h5')): raise ValueError('Only .h5 or .nxs files are supported.') @@ -236,10 +233,12 @@ def __init__( self.data_file = filename self.archive = archive self.logger = logger - self.valid_dataset_paths = [] - if valid_dataset_paths: - self.valid_dataset_paths = valid_dataset_paths - self.nexus = nexus + + self.nexus = True if nexus_dataset_map else False + self.nexus_dataset_map = nexus_dataset_map + self.valid_dataset_paths = ( + list(nexus_dataset_map.keys()) if nexus_dataset_map else [] + ) self._hdf5_datasets = collections.OrderedDict() self._hdf5_attributes = collections.OrderedDict() @@ -519,7 +518,9 @@ def _write_hdf5_file(self): # noqa: PLR0912 def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict): """Construct datasets and attributes for nexus and populate.""" - for nx_path, arch_path in CONCEPT_MAP.items(): + for nx_path, arch_path in self.nexus_dataset_map.items(): + if nx_path in self._hdf5_datasets or nx_path in self._hdf5_attributes: + continue if arch_path.startswith('archive.'): data = resolve_path(self.archive, arch_path.split('archive.', 1)[1]) else: diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py index 21474aea..568b60dd 100644 --- a/src/nomad_measurements/xrd/nx.py +++ b/src/nomad_measurements/xrd/nx.py @@ -16,35 +16,30 @@ # limitations under the License. # -NEXUS_DATASET_PATHS = [ - '/ENTRY[entry]/experiment_result/intensity', - '/ENTRY[entry]/experiment_result/two_theta', - '/ENTRY[entry]/experiment_result/omega', - '/ENTRY[entry]/experiment_result/chi', - '/ENTRY[entry]/experiment_result/phi', - '/ENTRY[entry]/experiment_config/count_time', - '/ENTRY[entry]/experiment_result/q_norm', - '/ENTRY[entry]/experiment_result/q_parallel', - '/ENTRY[entry]/experiment_result/q_perpendicular', - '/ENTRY[entry]/method', - '/ENTRY[entry]/measurement_type', - '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', -] +""" +The following connects the nexus file paths to the archive paths. +The nexus file paths come from the nexus_definitions available at: +https://github.com/FAIRmat-NFDI/nexus_definitions/ in the following file: +`contributed_definitions/NXxrd_pan.nxdl.xml`. +The archive paths are the paths in the NOMAD archive defined in the class: +`nomad_measurement.xrd.schema.ELNXRayDiffraction`. +""" - -CONCEPT_MAP = { +NEXUS_DATASET_MAP = { '/ENTRY[entry]/@default': 'experiment_result', '/ENTRY[entry]/definition': 'NXxrd_pan', + '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity', + '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta', + '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega', + '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi', + '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi', + '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm', + '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel', + '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular', '/ENTRY[entry]/method': 'archive.data.method', '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', '/ENTRY[entry]/experiment_result/@signal': 'intensity', + '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time', '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': ( 'archive.data.results[0].scan_axis' ), diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 5b1089d8..40e106bf 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -78,7 +78,7 @@ get_reference, merge_sections, ) -from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS +from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP if TYPE_CHECKING: from nomad.datamodel.datamodel import ( @@ -1357,8 +1357,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): filename=self.auxiliary_file, archive=archive, logger=logger, - valid_dataset_paths=NEXUS_DATASET_PATHS, - nexus=True, + nexus_dataset_map=NEXUS_DATASET_MAP, ) read_function, write_function = self.get_read_write_functions() if read_function is None or write_function is None: From 0fed11ac79626cbec92e9c37c2b6a61404800c4b Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 16 Jan 2025 11:43:42 +0100 Subject: [PATCH 27/41] Ruff --- src/nomad_measurements/xrd/nx.py | 4 +++- tests/test_xrd.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py index 568b60dd..9b0d6944 100644 --- a/src/nomad_measurements/xrd/nx.py +++ b/src/nomad_measurements/xrd/nx.py @@ -35,7 +35,9 @@ '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi', '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm', '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel', - '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular', + '/ENTRY[entry]/experiment_result/q_perpendicular': ( + 'archive.data.results[0].q_perpendicular' + ), '/ENTRY[entry]/method': 'archive.data.method', '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', '/ENTRY[entry]/experiment_result/@signal': 'intensity', diff --git a/tests/test_xrd.py b/tests/test_xrd.py index 4aa061d6..5ebede27 100644 --- a/tests/test_xrd.py +++ b/tests/test_xrd.py @@ -17,6 +17,7 @@ # import pytest from nomad.client import normalize_all + from nomad_measurements.xrd.schema import XRDResult1D test_files = [ From 0a055cc3c35300d1deb3af0ebee9ab32f0bbdb0b Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 16 Jan 2025 17:21:44 +0100 Subject: [PATCH 28/41] Make Auxiliary file name without raw file ext --- src/nomad_measurements/xrd/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 40e106bf..5501113a 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -1352,7 +1352,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): """ self.backward_compatibility() if self.data_file is not None: - self.auxiliary_file = f'{self.data_file}.nxs' + self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs' self.hdf5_handler = HDF5Handler( filename=self.auxiliary_file, archive=archive, From 22662372b2b19e8e8593a7d1b1a907326c269c46 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 16 Jan 2025 17:22:35 +0100 Subject: [PATCH 29/41] Add cleanup extensions for fixture --- tests/conftest.py | 13 ++++++++++--- tests/test_transmission.py | 9 +++++++-- tests/test_xrd.py | 10 +++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 975b2f47..e6b40201 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,8 +66,11 @@ def fixture_parsed_measurement_archive(request): file created by plugin parsers for the measurement data. Parsing this `.archive.json` file returns the `EntryArchive` object for the measurement data, which is finally yeilded to the test function. + request.param[0] is the relative path to the data file. + request.param[1] is a list of file extensions that need to be cleaned up after + the test. """ - rel_file_path = request.param + rel_file_path = request.param[0] file_archive = parse(rel_file_path)[0] rel_measurement_archive_path = os.path.join( @@ -79,5 +82,9 @@ def fixture_parsed_measurement_archive(request): yield parse(rel_measurement_archive_path)[0] - if os.path.exists(rel_measurement_archive_path): - os.remove(rel_measurement_archive_path) + # clean up + clean_up_extensions = request.param[1] + for ext in clean_up_extensions: + path = os.path.join(rel_file_path.rsplit('.', 1)[0] + ext) + if os.path.exists(path): + os.remove(path) diff --git a/tests/test_transmission.py b/tests/test_transmission.py index a22e65d6..f9e86330 100644 --- a/tests/test_transmission.py +++ b/tests/test_transmission.py @@ -26,11 +26,16 @@ 'tests/data/transmission/sphere_test01.Probe.Raw.asc', ] log_levels = ['error', 'critical'] +clean_up_extensions = ['.archive.json'] @pytest.mark.parametrize( 'parsed_measurement_archive, caplog', - [(file, log_level) for file in test_files for log_level in log_levels], + [ + ((file, clean_up_extensions), log_level) + for file in test_files + for log_level in log_levels + ], indirect=True, ) def test_normalize_all(parsed_measurement_archive, caplog): @@ -46,7 +51,7 @@ def test_normalize_all(parsed_measurement_archive, caplog): @pytest.mark.parametrize( 'parsed_measurement_archive, caplog', - [(test_files[0], log_level) for log_level in log_levels], + [((test_files[0], clean_up_extensions), log_level) for log_level in log_levels], indirect=True, ) def test_normalized_data(parsed_measurement_archive, caplog): diff --git a/tests/test_xrd.py b/tests/test_xrd.py index 5ebede27..6b65d69e 100644 --- a/tests/test_xrd.py +++ b/tests/test_xrd.py @@ -31,11 +31,19 @@ 'tests/data/xrd/TwoTheta_scan_powder.rasx', ] log_levels = ['error', 'critical'] +clean_up_extensions = ['.archive.json', '.nxs', '.h5'] @pytest.mark.parametrize( 'parsed_measurement_archive, caplog', - [(file, log_level) for file in test_files for log_level in log_levels], + [ + ( + (file, clean_up_extensions), + log_level, + ) + for file in test_files + for log_level in log_levels + ], indirect=True, ) def test_normalize_all(parsed_measurement_archive, caplog): From f484fd8fb898f2254c548cb3fd83448738e4598c Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 16 Jan 2025 18:13:04 +0100 Subject: [PATCH 30/41] Use bool in ELN to control raw file updation --- src/nomad_measurements/xrd/schema.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 5501113a..e54c5679 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -1196,6 +1196,14 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData): component=ELNComponentEnum.FileEditQuantity, ), ) + overwrite_auxiliary_file = Quantity( + type=bool, + default=True, + description='Overwrite the auxiliary file with the current data.', + a_eln=ELNAnnotation( + component=ELNComponentEnum.BoolEditQuantity, + ), + ) hdf5_handler = None measurement_identifiers = SubSection( section_def=ReadableIdentifiers, @@ -1371,12 +1379,14 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) - self.hdf5_handler.write_file() + if self.overwrite_auxiliary_file: + self.hdf5_handler.write_file() + self.overwrite_auxiliary_file = False if self.hdf5_handler.data_file != self.auxiliary_file: self.auxiliary_file = self.hdf5_handler.data_file - if archive.m_context.raw_path_exists( - self.auxiliary_file - ) and self.auxiliary_file.endswith('.nxs'): + + self.nexus_results = None + if self.auxiliary_file.endswith('.nxs'): nx_entry_id = get_entry_id_from_file_name( archive=archive, file_name=self.auxiliary_file ) From 05df712901de395267f2c4fbf8980709eac6f0af Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Mon, 20 Jan 2025 14:36:05 +0100 Subject: [PATCH 31/41] Remove 'file and entry deletion' --- src/nomad_measurements/utils.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 06ea941b..ba578bd5 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -377,10 +377,6 @@ def write_file(self): f"""NeXusFileGenerationError: Encountered '{e}' error while creating nexus file. Creating h5 file instead.""" ) - if self.archive.m_context.raw_path_exists(self.data_file): - os.remove( - os.path.join(self.archive.m_context.raw_path(), self.data_file) - ) self._write_hdf5_file() else: self._write_hdf5_file() @@ -436,8 +432,6 @@ def _write_nx_file(self): self.archive.m_context.raw_path(), self.data_file ) - if self.archive.m_context.raw_path_exists(self.data_file): - os.remove(nx_full_file_path) pynxtools_writer( data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path ).write() From c6c891d225e05fe89e97d5a0efbf979721961217 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Mon, 20 Jan 2025 14:43:37 +0100 Subject: [PATCH 32/41] remove defaults: trigger write if file is missing --- src/nomad_measurements/xrd/schema.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index e54c5679..e1cc8afe 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -1198,7 +1198,6 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData): ) overwrite_auxiliary_file = Quantity( type=bool, - default=True, description='Overwrite the auxiliary file with the current data.', a_eln=ELNAnnotation( component=ELNComponentEnum.BoolEditQuantity, @@ -1379,11 +1378,13 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) - if self.overwrite_auxiliary_file: + if self.overwrite_auxiliary_file or not archive.m_context.raw_path_exists( + self.auxiliary_file + ): self.hdf5_handler.write_file() self.overwrite_auxiliary_file = False - if self.hdf5_handler.data_file != self.auxiliary_file: - self.auxiliary_file = self.hdf5_handler.data_file + if self.hdf5_handler.data_file != self.auxiliary_file: + self.auxiliary_file = self.hdf5_handler.data_file self.nexus_results = None if self.auxiliary_file.endswith('.nxs'): From 75374660f58e8c4dad76caf403b4e5bb250217a6 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Mon, 20 Jan 2025 14:44:23 +0100 Subject: [PATCH 33/41] Minor --- src/nomad_measurements/xrd/schema.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index e1cc8afe..f9f63be3 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -1132,7 +1132,8 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): try: hdf5_handler = self.hdf5_handler - except AttributeError: + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): return if not archive.results.properties.structural: diffraction_patterns = [] @@ -1203,7 +1204,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData): component=ELNComponentEnum.BoolEditQuantity, ), ) - hdf5_handler = None + nexus_results = Quantity( + type=ArchiveSection, + description='Reference to the NeXus entry.', + a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity), + ) measurement_identifiers = SubSection( section_def=ReadableIdentifiers, ) @@ -1211,11 +1216,7 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData): diffraction_method_name.m_annotations['eln'] = ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ) - nexus_results = Quantity( - type=ArchiveSection, - description='Reference to the NeXus entry.', - a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity), - ) + hdf5_handler = None def get_read_write_functions(self) -> tuple[Callable, Callable]: """ From 5fc0a8d886d5f1a1b860d473915a07a705cde6f0 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 21 Jan 2025 10:40:01 +0100 Subject: [PATCH 34/41] Set hdf5 references at add_dataset stage --- src/nomad_measurements/utils.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index ba578bd5..c64b237c 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -296,6 +296,12 @@ def add_dataset( self._hdf5_datasets[path] = dataset if dataset.archive_path: self._hdf5_path_map[dataset.archive_path] = path + self._set_hdf5_reference( + self.archive, + dataset.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{self._remove_nexus_annotations(path)}', + ) def add_attribute( self, @@ -411,13 +417,6 @@ def _write_nx_file(self): except KeyError: template['optional'][nx_path] = dset.data - hdf5_path = self._remove_nexus_annotations(nx_path) - self._set_hdf5_reference( - self.archive, - dset.archive_path, - f'/uploads/{self.archive.m_context.upload_id}/raw' - f'/{self.data_file}#{hdf5_path}', - ) for nx_path, attr_d in list(self._hdf5_attributes.items()) + list( attr_dict.items() ): @@ -497,12 +496,6 @@ def _write_hdf5_file(self): # noqa: PLR0912 name=dataset_name, data=data, ) - self._set_hdf5_reference( - self.archive, - value.archive_path, - f'/uploads/{self.archive.m_context.upload_id}/raw' - f'/{self.data_file}#{key}', - ) for key, value in self._hdf5_attributes.items(): if key in h5: h5[key].attrs.update(value) From 49c6f41f13947a54bb35f141815e07f35bc9de03 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Tue, 21 Jan 2025 10:53:35 +0100 Subject: [PATCH 35/41] Reset on trigger for main branch PR only --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 49d80603..ece0b411 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -7,7 +7,7 @@ on: push: branches: [ "main" ] pull_request: - branches: [ "*" ] + branches: [ "main" ] permissions: contents: read From af89f3b7fa7fd7fa53a8942b4d02c4e0b683c73e Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 23 Jan 2025 16:05:56 +0100 Subject: [PATCH 36/41] abstract out set hdf5 ref --- src/nomad_measurements/utils.py | 23 ++++++++++++++++------- src/nomad_measurements/xrd/schema.py | 2 ++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index c64b237c..3ca87437 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -296,12 +296,6 @@ def add_dataset( self._hdf5_datasets[path] = dataset if dataset.archive_path: self._hdf5_path_map[dataset.archive_path] = path - self._set_hdf5_reference( - self.archive, - dataset.archive_path, - f'/uploads/{self.archive.m_context.upload_id}/raw' - f'/{self.data_file}#{self._remove_nexus_annotations(path)}', - ) def add_attribute( self, @@ -387,6 +381,8 @@ def write_file(self): else: self._write_hdf5_file() + self.set_hdf5_references() + def _write_nx_file(self): """ Method for creating a NeXus file. Additional data from the archive is added @@ -502,6 +498,20 @@ def _write_hdf5_file(self): # noqa: PLR0912 else: self.logger.warning(f'Path "{key}" not found to add attribute.') + def set_hdf5_references(self): + """ + Method for adding the HDF5 references to the archive quantities. + """ + for key, value in self._hdf5_datasets.items(): + if value.archive_path: + reference = self._remove_nexus_annotations(key) + self._set_hdf5_reference( + self.archive, + value.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{reference}', + ) + def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict): """Construct datasets and attributes for nexus and populate.""" @@ -554,7 +564,6 @@ def _remove_nexus_annotations(path: str) -> str: new_path += '/' + part.split('[')[0].strip().lower() else: new_path += '/' + part - new_path = new_path.replace('.nxs', '.h5') return new_path @staticmethod diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index f9f63be3..5c2dd353 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -1386,6 +1386,8 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): self.overwrite_auxiliary_file = False if self.hdf5_handler.data_file != self.auxiliary_file: self.auxiliary_file = self.hdf5_handler.data_file + else: + self.hdf5_handler.set_hdf5_references() self.nexus_results = None if self.auxiliary_file.endswith('.nxs'): From 965a91aa8ede5df1bc49b042d260efb4680a81a0 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 23 Jan 2025 16:09:26 +0100 Subject: [PATCH 37/41] Comment out nexus, TODOs, docstrings --- src/nomad_measurements/xrd/schema.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index 5c2dd353..f2ece071 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -273,6 +273,12 @@ class XRDSettings(ArchiveSection): class XRDResultPlotIntensity(ArchiveSection): + """ + Section for plotting the intensity over 2-theta. A separate sub-section allows to + create a separate group in `.h5` file. Attributes are added to the group to generate + the plot. + """ + m_def = Section( a_h5web=H5WebAnnotation( axes=['two_theta', 'omega', 'phi', 'chi'], signal='intensity' @@ -364,6 +370,12 @@ def normalize(self, archive, logger): class XRDResultPlotIntensityScatteringVector(ArchiveSection): + """ + Section for plotting the intensity over scattering vector. A separate sub-section + allows to create a separate group in `.h5` file. Attributes are added to the group + to generate the plot. + """ + m_def = Section( a_h5web=H5WebAnnotation( axes=['q_parallel', 'q_perpendicular', 'q_norm'], signal='intensity' @@ -1360,12 +1372,17 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): """ self.backward_compatibility() if self.data_file is not None: - self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs' + # TODO (ka-sarthak): use .nxs file once updating the flag through the + # normalizer works. + # self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs' + self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.h5' self.hdf5_handler = HDF5Handler( filename=self.auxiliary_file, archive=archive, logger=logger, - nexus_dataset_map=NEXUS_DATASET_MAP, + # TODO (ka-sarthak): use nexus dataset map once updating the flag + # through the normalizer works. + # nexus_dataset_map=NEXUS_DATASET_MAP, ) read_function, write_function = self.get_read_write_functions() if read_function is None or write_function is None: @@ -1383,9 +1400,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): self.auxiliary_file ): self.hdf5_handler.write_file() - self.overwrite_auxiliary_file = False if self.hdf5_handler.data_file != self.auxiliary_file: self.auxiliary_file = self.hdf5_handler.data_file + # TODO (ka-sarthak): update the flag through the normalizer once it works. + # self.overwrite_auxiliary_file = False else: self.hdf5_handler.set_hdf5_references() From 5308370947fd0f9043cdc6e8fd6a37a631575f84 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 23 Jan 2025 16:58:09 +0100 Subject: [PATCH 38/41] Reprocess nxs entry --- src/nomad_measurements/utils.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 3ca87437..5f51ab31 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -389,7 +389,6 @@ def _write_nx_file(self): to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus view of the data in addition to storing array data. """ - from nomad.processing.data import Entry app_def = 'NXxrd_pan' nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def) @@ -430,12 +429,9 @@ def _write_nx_file(self): pynxtools_writer( data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path ).write() - - entry_list = Entry.objects( - upload_id=self.archive.m_context.upload_id, mainfile=self.data_file + self.archive.m_context.process_updated_raw_file( + self.data_file, allow_modify=True ) - if not entry_list: - self.archive.m_context.process_updated_raw_file(self.data_file) def _write_hdf5_file(self): # noqa: PLR0912 """ From 1682c9413c2e3fd1ba190a904e365e7cfa81d971 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 23 Jan 2025 17:35:47 +0100 Subject: [PATCH 39/41] Handle missing dataset in add step --- src/nomad_measurements/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 5f51ab31..ad952ed1 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -269,18 +269,21 @@ def add_dataset( validate_path (bool): If True, the dataset path is validated. """ if not params: - self.logger.warning('Dataset `params` not provided.') + self.logger.warning(f'No params provided for path "{path}". Skipping.') return dataset = DatasetModel( **params, ) + if dataset.data is None: + self.logger.warning(f'No data provided for the path "{path}". Skipping.') + return if ( validate_path and self.valid_dataset_paths and path not in self.valid_dataset_paths ): - self.logger.warning(f'Invalid dataset path "{path}".') + self.logger.warning(f'Invalid dataset path "{path}". Skipping.') return # handle the pint.Quantity and add data @@ -311,7 +314,7 @@ def add_attribute( params (dict): The attributes to be added. """ if not params: - self.logger.warning('Attribute `params` not provided.') + self.logger.warning(f'No params provided for attribute {path}.') return self._hdf5_attributes[path] = params @@ -459,9 +462,6 @@ def _write_hdf5_file(self): # noqa: PLR0912 ) as h5: for key, value in self._hdf5_datasets.items(): data = value.data - if data is None: - self.logger.warning(f'No data found for "{key}". Skipping.') - continue if value.internal_reference: # resolve the internal reference try: From 96bdf07621f3e2f36467e5d2e1433755585623af Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Thu, 23 Jan 2025 17:51:02 +0100 Subject: [PATCH 40/41] Comment out import --- src/nomad_measurements/xrd/schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index f2ece071..df2e7e67 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -78,7 +78,8 @@ get_reference, merge_sections, ) -from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP + +# from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP if TYPE_CHECKING: from nomad.datamodel.datamodel import ( From ab1081dccd788bd79d5bffcb1f16029bc65d7d34 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Fri, 24 Jan 2025 14:28:01 +0100 Subject: [PATCH 41/41] Review: sourcery --- src/nomad_measurements/utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index ad952ed1..ab4def11 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -234,7 +234,7 @@ def __init__( self.archive = archive self.logger = logger - self.nexus = True if nexus_dataset_map else False + self.nexus = bool(nexus_dataset_map) self.nexus_dataset_map = nexus_dataset_map self.valid_dataset_paths = ( list(nexus_dataset_map.keys()) if nexus_dataset_map else [] @@ -344,8 +344,7 @@ def read_dataset(self, path: str, is_archive_path: bool = False): if dataset_path in self._hdf5_datasets: value = self._hdf5_datasets[dataset_path].data if dataset_path in self._hdf5_attributes: - units = self._hdf5_attributes[dataset_path].get('units') - if units: + if units := self._hdf5_attributes[dataset_path].get('units'): value *= ureg(units) return value @@ -554,13 +553,14 @@ def _remove_nexus_annotations(path: str) -> str: return path pattern = r'.*\[.*\]' - new_path = '' - for part in path.split('/')[1:]: - if re.match(pattern, part): - new_path += '/' + part.split('[')[0].strip().lower() - else: - new_path += '/' + part - return new_path + return ''.join( + ( + '/' + part.split('[')[0].strip().lower() + if re.match(pattern, part) + else f'/{part}' + ) + for part in path.split('/')[1:] + ) @staticmethod def _set_hdf5_reference(