From 898e3fe8e8b2e914a2e2c4e9c4ee75bdea28dbd3 Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Mon, 19 Aug 2024 15:45:37 +0200
Subject: [PATCH 01/41] updated plugin structure

---
 MANIFEST.in                                       |  1 +
 src/nomad_measurements/general/__init__.py        | 15 +++++++++++++++
 .../{__init__.py => general/schema.py}            |  0
 3 files changed, 16 insertions(+)
 create mode 100644 src/nomad_measurements/general/__init__.py
 rename src/nomad_measurements/{__init__.py => general/schema.py} (100%)

diff --git a/MANIFEST.in b/MANIFEST.in
index e69de29b..b5ccc2d3 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include * nomad_plugin.yaml
diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
new file mode 100644
index 00000000..7e990a5d
--- /dev/null
+++ b/src/nomad_measurements/general/__init__.py
@@ -0,0 +1,15 @@
+
+from nomad.config.models.plugins import SchemaPackageEntryPoint
+
+class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint):
+
+    def load(self):
+        from nomad_measurements.general.schema import m_package
+
+        return m_package
+
+
+general_schema = GeneralSchemaPackageEntryPoint(
+    name='GeneralSchema',
+    description='Schema package defined using the new plugin mechanism.',
+)
diff --git a/src/nomad_measurements/__init__.py b/src/nomad_measurements/general/schema.py
similarity index 100%
rename from src/nomad_measurements/__init__.py
rename to src/nomad_measurements/general/schema.py

From 2f81aa5ed4958ac80b32538cd9f9abb8416e343e Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Mon, 19 Aug 2024 16:01:02 +0200
Subject: [PATCH 02/41] added pynxtools dependency

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2a9392d6..cbb1a390 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,9 +35,10 @@ maintainers = [
 ]
 license = { file = "LICENSE" }
 dependencies = [
-    "nomad-lab>=1.3.6", 
+    "nomad-lab>=1.3.6",
     "xmltodict==0.13.0",
     "fairmat-readers-xrd>=0.0.3",
+    "pynxtools@git+https://github.com/FAIRmat-NFDI/pynxtools.git@master",
     "nomad-material-processing",
     "fairmat-readers-transmission",
 ]

From 103076bd955fce936ab2c7db49bd6f6c7b4bd7fa Mon Sep 17 00:00:00 2001
From: Andrea Albino <95371554+aalbino2@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:51:52 +0200
Subject: [PATCH 03/41] Apply suggestions from Sarthak's code review

Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>
---
 src/nomad_measurements/general/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
index 7e990a5d..6b2a9d9c 100644
--- a/src/nomad_measurements/general/__init__.py
+++ b/src/nomad_measurements/general/__init__.py
@@ -9,7 +9,7 @@ def load(self):
         return m_package
 
 
-general_schema = GeneralSchemaPackageEntryPoint(
+schema = GeneralSchemaPackageEntryPoint(
     name='GeneralSchema',
     description='Schema package defined using the new plugin mechanism.',
 )

From a42a70745b927a1a121ed74588f819ed0e92ff56 Mon Sep 17 00:00:00 2001
From: aalbino2 <andrea.albino@hotmail.it>
Date: Tue, 20 Aug 2024 14:25:26 +0200
Subject: [PATCH 04/41] ruff linting

---
 src/nomad_measurements/general/__init__.py | 1 +
 src/nomad_measurements/xrd/__init__.py     | 1 +
 src/nomad_measurements/xrd/schema.py       | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
index 6b2a9d9c..d547c857 100644
--- a/src/nomad_measurements/general/__init__.py
+++ b/src/nomad_measurements/general/__init__.py
@@ -1,6 +1,7 @@
 
 from nomad.config.models.plugins import SchemaPackageEntryPoint
 
+
 class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint):
 
     def load(self):
diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py
index a088a3d4..be4f237f 100644
--- a/src/nomad_measurements/xrd/__init__.py
+++ b/src/nomad_measurements/xrd/__init__.py
@@ -1,6 +1,7 @@
 from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint
 
 
+
 class XRDSchemaPackageEntryPoint(SchemaPackageEntryPoint):
     def load(self):
         from nomad_measurements.xrd.schema import m_package
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index c6a2e3df..d12fb8eb 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -19,6 +19,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    Dict,
 )
 
 import numpy as np

From 48be124ee5dbaa0b7880758e422a9ff2684348c8 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 20 Aug 2024 15:31:31 +0200
Subject: [PATCH 05/41] Ruff linting 2

---
 src/nomad_measurements/general/__init__.py | 2 --
 src/nomad_measurements/xrd/__init__.py     | 1 -
 src/nomad_measurements/xrd/schema.py       | 1 -
 3 files changed, 4 deletions(-)

diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
index d547c857..853e1803 100644
--- a/src/nomad_measurements/general/__init__.py
+++ b/src/nomad_measurements/general/__init__.py
@@ -1,9 +1,7 @@
-
 from nomad.config.models.plugins import SchemaPackageEntryPoint
 
 
 class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint):
-
     def load(self):
         from nomad_measurements.general.schema import m_package
 
diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py
index be4f237f..a088a3d4 100644
--- a/src/nomad_measurements/xrd/__init__.py
+++ b/src/nomad_measurements/xrd/__init__.py
@@ -1,7 +1,6 @@
 from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint
 
 
-
 class XRDSchemaPackageEntryPoint(SchemaPackageEntryPoint):
     def load(self):
         from nomad_measurements.xrd.schema import m_package
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index d12fb8eb..c6a2e3df 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -19,7 +19,6 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
 )
 
 import numpy as np

From 282c48f3ebd57f76e5e373a48beb701f0759347c Mon Sep 17 00:00:00 2001
From: Andrea Albino <95371554+aalbino2@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:07:30 +0200
Subject: [PATCH 06/41] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>
Co-authored-by: Hampus Näsström <hampus.nasstrom@gmail.com>
---
 src/nomad_measurements/general/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
index 853e1803..621f7b0c 100644
--- a/src/nomad_measurements/general/__init__.py
+++ b/src/nomad_measurements/general/__init__.py
@@ -9,6 +9,6 @@ def load(self):
 
 
 schema = GeneralSchemaPackageEntryPoint(
-    name='GeneralSchema',
+    name='General Schema',
     description='Schema package defined using the new plugin mechanism.',
 )

From bc29a436c1cffb386dd3880d6e90d31526a987fb Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Wed, 21 Aug 2024 09:36:58 +0200
Subject: [PATCH 07/41] changed xrd parser folder

---
 src/nomad_measurements/xrd/__init__.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py
index a088a3d4..3ecf893f 100644
--- a/src/nomad_measurements/xrd/__init__.py
+++ b/src/nomad_measurements/xrd/__init__.py
@@ -27,3 +27,18 @@ def load(self):
     mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$',
     mainfile_mime_re='text/.*|application/zip',
 )
+
+
+class XRDParserEntryPoint(ParserEntryPoint):
+    def load(self):
+        from nomad_measurements.xrd.parser import XRDParser
+
+        return XRDParser(**self.dict())
+
+
+parser = XRDParserEntryPoint(
+    name='XRD Parser',
+    description='Parser defined using the new plugin mechanism.',
+    mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$',
+    mainfile_mime_re='text/.*|application/zip',
+)

From 581a77dbafc0e9d0092c5b5452407ce8aab7daa1 Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Wed, 21 Aug 2024 10:00:23 +0200
Subject: [PATCH 08/41] last fixes and descriptions

---
 MANIFEST.in                            | 1 -
 src/nomad_measurements/xrd/__init__.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index b5ccc2d3..e69de29b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +0,0 @@
-recursive-include * nomad_plugin.yaml
diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py
index 3ecf893f..86e95635 100644
--- a/src/nomad_measurements/xrd/__init__.py
+++ b/src/nomad_measurements/xrd/__init__.py
@@ -38,7 +38,7 @@ def load(self):
 
 parser = XRDParserEntryPoint(
     name='XRD Parser',
-    description='Parser defined using the new plugin mechanism.',
+    description='Parser for several kinds of raw files from XRD measurements.',
     mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$',
     mainfile_mime_re='text/.*|application/zip',
 )

From cdc749d1035b2ef9ee8c8b0d51e06fc4fa010ee9 Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Wed, 21 Aug 2024 10:13:34 +0200
Subject: [PATCH 09/41] description of general schema

---
 src/nomad_measurements/general/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
index 621f7b0c..bcfec234 100644
--- a/src/nomad_measurements/general/__init__.py
+++ b/src/nomad_measurements/general/__init__.py
@@ -10,5 +10,5 @@ def load(self):
 
 schema = GeneralSchemaPackageEntryPoint(
     name='General Schema',
-    description='Schema package defined using the new plugin mechanism.',
+    description='Schema package containing basic classes used around in the plugin.',
 )

From ce2a3640375d004f604decf1a691a43317e2fe81 Mon Sep 17 00:00:00 2001
From: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Date: Wed, 21 Aug 2024 16:08:10 +0200
Subject: [PATCH 10/41] changed general package into a module

---
 .../{general/schema.py => __init__.py}             |  0
 src/nomad_measurements/general/__init__.py         | 14 --------------
 2 files changed, 14 deletions(-)
 rename src/nomad_measurements/{general/schema.py => __init__.py} (100%)
 delete mode 100644 src/nomad_measurements/general/__init__.py

diff --git a/src/nomad_measurements/general/schema.py b/src/nomad_measurements/__init__.py
similarity index 100%
rename from src/nomad_measurements/general/schema.py
rename to src/nomad_measurements/__init__.py
diff --git a/src/nomad_measurements/general/__init__.py b/src/nomad_measurements/general/__init__.py
deleted file mode 100644
index bcfec234..00000000
--- a/src/nomad_measurements/general/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from nomad.config.models.plugins import SchemaPackageEntryPoint
-
-
-class GeneralSchemaPackageEntryPoint(SchemaPackageEntryPoint):
-    def load(self):
-        from nomad_measurements.general.schema import m_package
-
-        return m_package
-
-
-schema = GeneralSchemaPackageEntryPoint(
-    name='General Schema',
-    description='Schema package containing basic classes used around in the plugin.',
-)

From dfb04b94936b3abf12b1c008f9ca33f27da309f0 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 11 Jun 2024 15:55:06 +0200
Subject: [PATCH 11/41] Implement write nexus section based on the populated
 nomad archive

---
 src/nomad_measurements/xrd/schema.py | 167 +++++----------------------
 1 file changed, 30 insertions(+), 137 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index c6a2e3df..a5c7cb14 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -78,6 +78,10 @@
     from structlog.stdlib import (
         BoundLogger,
     )
+    import pint
+
+from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection
+from pynxtools import dataconverter
 
 from nomad.config import config
 
@@ -86,48 +90,6 @@
 m_package = SchemaPackage(aliases=['nomad_measurements.xrd.parser.parser'])
 
 
-def populate_nexus_subsection(**kwargs):
-    raise NotImplementedError
-
-
-def handle_nexus_subsection(
-    xrd_template: 'Template',
-    nexus_out: str,
-    archive: 'EntryArchive',
-    logger: 'BoundLogger',
-):
-    """
-    Function for populating the NeXus section from the xrd_template.
-
-    Args:
-        xrd_template (Template): The xrd data in a NeXus Template.
-        nexus_out (str): The name of the optional NeXus output file.
-        archive (EntryArchive): The archive containing the section.
-        logger (BoundLogger): A structlog logger.
-    """
-    nxdl_name = 'NXxrd_pan'
-    if nexus_out:
-        if not nexus_out.endswith('.nxs'):
-            nexus_out = nexus_out + '.nxs'
-        populate_nexus_subsection(
-            template=xrd_template,
-            app_def=nxdl_name,
-            archive=archive,
-            logger=logger,
-            output_file_path=nexus_out,
-            on_temp_file=False,
-        )
-    else:
-        populate_nexus_subsection(
-            template=xrd_template,
-            app_def=nxdl_name,
-            archive=archive,
-            logger=logger,
-            output_file_path=nexus_out,
-            on_temp_file=True,
-        )
-
-
 def calculate_two_theta_or_q(
     wavelength: 'pint.Quantity',
     q: 'pint.Quantity' = None,
@@ -878,7 +840,6 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
         label='X-Ray Diffraction (XRD)',
         a_eln=ELNAnnotation(
             lane_width='800px',
-            hide=['generate_nexus_file'],
         ),
         a_template={
             'measurement_identifiers': {},
@@ -996,108 +957,39 @@ def write_xrd_data(
         )
         merge_sections(self, xrd, logger)
 
-    def write_nx_xrd(
-        self,
-        xrd_dict: 'Template',
-        archive: 'EntryArchive',
-        logger: 'BoundLogger',
-    ) -> None:
+    def write_nx_section_and_create_file(
+        self, archive: 'EntryArchive', logger: 'BoundLogger'
+    ):
         """
-        Populate `ELNXRayDiffraction` section from a NeXus Template.
+        Uses the archive to generate the NeXus section and .nxs file.
 
         Args:
-            xrd_dict (Dict[str, Any]): A dictionary with the XRD data.
             archive (EntryArchive): The archive containing the section.
             logger (BoundLogger): A structlog logger.
         """
-        # TODO add the result section based on the scan_type
-        result = XRDResult(
-            intensity=xrd_dict.get(
-                '/ENTRY[entry]/2theta_plot/intensity',
-                None,
-            ),
-            two_theta=xrd_dict.get(
-                '/ENTRY[entry]/2theta_plot/two_theta',
-                None,
-            ),
-            omega=xrd_dict.get(
-                '/ENTRY[entry]/2theta_plot/omega',
-                None,
-            ),
-            chi=xrd_dict.get('/ENTRY[entry]/2theta_plot/chi', None),
-            phi=xrd_dict.get(
-                '/ENTRY[entry]/2theta_plot/phi',
-                None,
-            ),
-            scan_axis=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis',
-                None,
-            ),
-            integration_time=xrd_dict.get(
-                '/ENTRY[entry]/COLLECTION[collection]/count_time', None
-            ),
-        )
-        result.normalize(archive, logger)
-
-        source = XRayTubeSource(
-            xray_tube_material=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material',
-                None,
-            ),
-            kalpha_one=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one',
-                None,
-            ),
-            kalpha_two=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two',
-                None,
-            ),
-            ratio_kalphatwo_kalphaone=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone',
-                None,
-            ),
-            kbeta=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
-                None,
-            ),
-            xray_tube_voltage=xrd_dict.get(
-                'ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage',
-                None,
-            ),
-            xray_tube_current=xrd_dict.get(
-                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current',
-                None,
-            ),
-        )
-        source.normalize(archive, logger)
-
-        xrd_settings = XRDSettings(source=source)
-        xrd_settings.normalize(archive, logger)
-
-        sample = CompositeSystemReference(
-            lab_id=xrd_dict.get(
-                '/ENTRY[entry]/SAMPLE[sample]/sample_id',
-                None,
-            ),
-        )
-        sample.normalize(archive, logger)
-
-        xrd = ELNXRayDiffraction(
-            results=[result],
-            xrd_settings=xrd_settings,
-            samples=[sample],
+        nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan')
+        template = dataconverter.template.Template()
+        dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template)
+
+        template['/ENTRY[entry]/2theta_plot/intensity'] = archive.data.results[
+            0
+        ].intensity.magnitude
+        template['/ENTRY[entry]/2theta_plot/two_theta'] = archive.data.results[
+            0
+        ].two_theta.magnitude
+        template['/ENTRY[entry]/2theta_plot/two_theta/@units'] = str(
+            archive.data.results[0].two_theta.units
         )
-        merge_sections(self, xrd, logger)
+        archive_name = archive.metadata.mainfile.split('.')[0]
+        nexus_output = f'{archive_name}_output.nxs'
 
-        nexus_output = None
-        if self.generate_nexus_file:
-            archive_name = archive.metadata.mainfile.split('.')[0]
-            nexus_output = f'{archive_name}_output.nxs'
-        handle_nexus_subsection(
-            xrd_dict,
-            nexus_output,
-            archive,
-            logger,
+        populate_nexus_subsection(
+            template=template,
+            app_def='NXxrd_pan',
+            archive=archive,
+            logger=logger,
+            output_file_path=nexus_output,
+            on_temp_file=self.generate_nexus_file,
         )
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
@@ -1122,6 +1014,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         super().normalize(archive, logger)
         if not self.results:
             return
+        self.write_nx_section_and_create_file(archive, logger)
         self.figures = self.results[0].generate_plots(archive, logger)
 
 
From 78fe74a7f262e324f57c7f35efedeab6b708c21b Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 27 Aug 2024 11:41:26 +0200
Subject: [PATCH 12/41] Update path of populate_nexus_subsection

---
 src/nomad_measurements/xrd/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index a5c7cb14..144a9ed7 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -80,7 +80,7 @@
     )
     import pint
 
-from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection
+from pynxtools.nomad.dataconverter import populate_nexus_subsection
 from pynxtools import dataconverter
 
 from nomad.config import config

From 380d21af0cdeed36f64e9f3462d031998d9df6d5 Mon Sep 17 00:00:00 2001
From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:57:18 +0200
Subject: [PATCH 13/41] app def missing. (#108)

* Implement write nexus section based on the populated nomad archive

* app def missing.

* mapping nomad_measurement.

* All concept are connected, creates nexus file and subsection.

* adding links in hdf5 file.

* Remove the nxs file.

* back to the previous design.

* Include pynxtools plugins in nomad.yaml and extend dependencies including pynxtools ans pnxtools-xrd.

* PR review correction.

* Remove the entry_type overwtitten.

* Remove comments.

* Replace __str__ function.

* RUFF

* Update pyproject.toml

Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>

* Update src/nomad_measurements/xrd/schema.py

Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>

* Update src/nomad_measurements/xrd/nx.py

* Replace Try-block.

---------

Co-authored-by: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>
---
 src/nomad_measurements/xrd/nx.py     | 182 +++++++++++++++++++++++++++
 src/nomad_measurements/xrd/schema.py |  47 ++-----
 2 files changed, 189 insertions(+), 40 deletions(-)
 create mode 100644 src/nomad_measurements/xrd/nx.py

diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py
new file mode 100644
index 00000000..e1b41fcf
--- /dev/null
+++ b/src/nomad_measurements/xrd/nx.py
@@ -0,0 +1,182 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import TYPE_CHECKING
+
+from pynxtools import dataconverter
+from pynxtools.nomad.dataconverter import populate_nexus_subsection
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import EntryArchive
+    from structlog.stdlib import (
+        BoundLogger,
+    )
+
+
+def walk_through_object(parent_obj, attr_chain, default=None):
+    """
+    Walk though the object until reach the leaf.
+
+    Args:
+        parent_obj: This is a python obj.
+        attr_chain: Dot separated obj chain.
+        default: A value to be returned by default, if not data is found.
+    """
+    expected_parts = 2
+    if isinstance(attr_chain, str):
+        parts = attr_chain.split('.', 1)
+
+        if len(parts) == expected_parts:
+            child_nm, rest_part = parts
+            if '[' in child_nm:
+                child_nm, index = child_nm.split('[')
+                index = int(index[:-1])
+                child_obj = getattr(parent_obj, child_nm)[index]
+            else:
+                child_obj = getattr(parent_obj, child_nm)
+            return walk_through_object(child_obj, rest_part, default=default)
+        else:
+            return getattr(parent_obj, attr_chain, default)
+
+
+def connect_concepts(template, archive: 'EntryArchive', scan_type: str):  # noqa: PLR0912
+    """
+    Connect the concepts between `ELNXrayDiffraction` and `NXxrd_pan` schema.
+
+    Args:
+        template (Template): The pynxtools template, a inherited class from python dict.
+        archive (EntryArchive): Nomad archive contains secttions, subsections and
+            quantities.
+        scan_type (str): Name of the scan type such as line and RSM.
+    """
+
+    # General concepts
+    # ruff: noqa: E501
+    concept_map = {
+        '/ENTRY[entry]/method': 'archive.data.method',
+        '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
+        '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity.magnitude',
+        '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta.magnitude',
+        '/ENTRY[entry]/experiment_result/two_theta/@units': 'archive.data.results[0].two_theta.units',
+        '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega.magnitude',
+        '/ENTRY[entry]/experiment_result/omega/@units': 'archive.data.results[0].omega.units',
+        '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi.magnitude',
+        '/ENTRY[entry]/experiment_result/chi/@units': 'archive.data.results[0].chi.units',
+        '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi.magnitude',
+        '/ENTRY[entry]/experiment_result/phi/@units': 'archive.data.results[0].phi.units',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis',
+        '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time.magnitude',
+        'line': '',  # For future implementation
+        'rsm': {
+            '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel',
+            '/ENTRY[entry]/experiment_result/q_parallel/@units': 'archive.data.results[0].q_parallel.units',
+            '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular.magnitude',
+            '/ENTRY[entry]/experiment_result/q_perpendicular/@units': 'archive.data.results[0].q_perpendicular.units',
+            '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm.magnitude',
+            '/ENTRY[entry]/experiment_result/q_norm/@units': 'archive.data.results[0].q_norm.units',
+        },
+        # Source
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current.magnitude',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current/@units': 'archive.data.xrd_settings.source.xray_tube_current.units',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage.magnitude',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage/@units': 'archive.data.xrd_settings.source.xray_tube_voltage.units',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one.magnitude',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one/@units': 'archive.data.xrd_settings.source.kalpha_one.units',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two.magnitude',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two/@units': 'archive.data.xrd_settings.source.kalpha_two.units',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta.magnitude',
+        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta/@units': 'archive.data.xrd_settings.source.kbeta.units',
+    }
+
+    for key, archive_concept in concept_map.items():
+        if isinstance(archive_concept, dict):
+            if key == scan_type:
+                for sub_key, sub_archive_concept in archive_concept.items():
+                    _, arch_attr = sub_archive_concept.split('.', 1)
+                    value = None
+                    try:
+                        value = walk_through_object(archive, arch_attr)
+                    except (AttributeError, IndexError, KeyError, ValueError):
+                        pass
+                    finally:
+                        if value is not None:
+                            template[sub_key] = (
+                                str(value) if sub_key.endswith('units') else value
+                            )
+            else:
+                continue
+        elif archive_concept:
+            _, arch_attr = archive_concept.split('.', 1)
+            value = None
+            try:
+                value = walk_through_object(archive, arch_attr)
+            # Use multiple excepts to avoid catching all exceptions
+            except (AttributeError, IndexError, KeyError, ValueError):
+                pass
+            finally:
+                if value is not None:
+                    template[key] = str(value) if key.endswith('units') else value
+
+    template['/ENTRY[entry]/definition'] = 'NXxrd_pan'
+
+    # Links to the data and concepts
+    template['/ENTRY[entry]/@default'] = 'experiment_result'
+    template['/ENTRY[entry]/experiment_result/@signal'] = 'intensity'
+    template['/ENTRY[entry]/experiment_result/@axes'] = 'two_theta'
+    template['/ENTRY[entry]/q_data/q'] = {
+        'link': '/ENTRY[entry]/experiment_result/q_norm'
+    }
+    template['/ENTRY[entry]/q_data/intensity'] = {
+        'link': '/ENTRY[entry]/experiment_result/intensity'
+    }
+    template['/ENTRY[entry]/q_data/q_parallel'] = {
+        'link': '/ENTRY[entry]/experiment_result/q_parallel'
+    }
+    template['/ENTRY[entry]/q_data/q_perpendicular'] = {
+        'link': '/ENTRY[entry]/experiment_result/q_perpendicular'
+    }
+
+
+def write_nx_section_and_create_file(
+    archive: 'EntryArchive', logger: 'BoundLogger', scan_type: str = 'line'
+):
+    """
+    Uses the archive to generate the NeXus section and .nxs file.
+
+    Args:
+        archive (EntryArchive): The archive containing the section.
+        logger (BoundLogger): A structlog logger.
+        generate_nexus_file (boolean): If True, the function will generate a .nxs file.
+        nxs_as_entry (boolean): If True, the function will generate a .nxs file
+                as a nomad entry.
+    """
+    nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan')
+    template = dataconverter.template.Template()
+    dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template)
+    connect_concepts(template, archive, scan_type=scan_type)
+    archive_name = archive.metadata.mainfile.split('.')[0]
+    nexus_output = f'{archive_name}.nxs'
+
+    populate_nexus_subsection(
+        template=template,
+        app_def='NXxrd_pan',
+        archive=archive,
+        logger=logger,
+        output_file_path=nexus_output,
+    )
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 144a9ed7..2923a17e 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -68,20 +68,17 @@
     NOMADMeasurementsCategory,
 )
 from nomad_measurements.utils import get_bounding_range_2d, merge_sections
+from nomad_measurements.xrd.nx import write_nx_section_and_create_file
 
 if TYPE_CHECKING:
     import pint
     from nomad.datamodel.datamodel import (
         EntryArchive,
     )
-    from pynxtools.dataconverter.template import Template
     from structlog.stdlib import (
         BoundLogger,
     )
-    import pint
 
-from pynxtools.nomad.dataconverter import populate_nexus_subsection
-from pynxtools import dataconverter
 
 from nomad.config import config
 
@@ -862,6 +859,7 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
     generate_nexus_file = Quantity(
         type=bool,
         description='Whether or not to generate a NeXus output file (if possible).',
+        default=True,
         a_eln=ELNAnnotation(
             component=ELNComponentEnum.BoolEditQuantity,
             label='Generate NeXus file',
@@ -957,41 +955,6 @@ def write_xrd_data(
         )
         merge_sections(self, xrd, logger)
 
-    def write_nx_section_and_create_file(
-        self, archive: 'EntryArchive', logger: 'BoundLogger'
-    ):
-        """
-        Uses the archive to generate the NeXus section and .nxs file.
-
-        Args:
-            archive (EntryArchive): The archive containing the section.
-            logger (BoundLogger): A structlog logger.
-        """
-        nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan')
-        template = dataconverter.template.Template()
-        dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template)
-
-        template['/ENTRY[entry]/2theta_plot/intensity'] = archive.data.results[
-            0
-        ].intensity.magnitude
-        template['/ENTRY[entry]/2theta_plot/two_theta'] = archive.data.results[
-            0
-        ].two_theta.magnitude
-        template['/ENTRY[entry]/2theta_plot/two_theta/@units'] = str(
-            archive.data.results[0].two_theta.units
-        )
-        archive_name = archive.metadata.mainfile.split('.')[0]
-        nexus_output = f'{archive_name}_output.nxs'
-
-        populate_nexus_subsection(
-            template=template,
-            app_def='NXxrd_pan',
-            archive=archive,
-            logger=logger,
-            output_file_path=nexus_output,
-            on_temp_file=self.generate_nexus_file,
-        )
-
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         """
         The normalize function of the `ELNXRayDiffraction` section.
@@ -1014,7 +977,11 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         super().normalize(archive, logger)
         if not self.results:
             return
-        self.write_nx_section_and_create_file(archive, logger)
+
+        scan_type = xrd_dict.get('metadata', {}).get('scan_type', None)
+        if self.generate_nexus_file and self.data_file is not None:
+            write_nx_section_and_create_file(archive, logger, scan_type=scan_type)
+
         self.figures = self.results[0].generate_plots(archive, logger)
 
 
From b3b90f357d410af4c2a98a54708b0ab67c772d88 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 29 Aug 2024 15:28:16 +0200
Subject: [PATCH 14/41] Run Python test action for all PR

---
 .github/workflows/python-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
index ece0b411..49d80603 100644
--- a/.github/workflows/python-test.yml
+++ b/.github/workflows/python-test.yml
@@ -7,7 +7,7 @@ on:
   push:
     branches: [ "main" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "*" ]
 
 permissions:
   contents: read

From 266f2516c65df4eb1575820b37bdf7b00383703e Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 3 Sep 2024 10:51:14 +0200
Subject: [PATCH 15/41] Fix for rebasing issues

---
 src/nomad_measurements/xrd/__init__.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/nomad_measurements/xrd/__init__.py b/src/nomad_measurements/xrd/__init__.py
index 86e95635..a088a3d4 100644
--- a/src/nomad_measurements/xrd/__init__.py
+++ b/src/nomad_measurements/xrd/__init__.py
@@ -14,21 +14,6 @@ def load(self):
 )
 
 
-class XRDParserEntryPoint(ParserEntryPoint):
-    def load(self):
-        from nomad_measurements.xrd.parser import XRDParser
-
-        return XRDParser(**self.dict())
-
-
-parser = XRDParserEntryPoint(
-    name='XRD Parser',
-    description='Parser for several kinds of raw files from XRD measurements.',
-    mainfile_name_re=r'^.*\.xrdml$|^.*\.rasx$|^.*\.brml$',
-    mainfile_mime_re='text/.*|application/zip',
-)
-
-
 class XRDParserEntryPoint(ParserEntryPoint):
     def load(self):
         from nomad_measurements.xrd.parser import XRDParser

From f82d4a7810e872544f2356cb518b7632c0f15197 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 14 Jan 2025 10:58:38 +0100
Subject: [PATCH 16/41] Use hdf5 references for arrays (#118)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* updated plugin structure

* added pynxtools dependency

* Apply suggestions from code review

Co-authored-by: Sarthak Kapoor <57119427+ka-sarthak@users.noreply.github.com>
Co-authored-by: Hampus Näsström <hampus.nasstrom@gmail.com>

* Add sections for RSM and 1D which uses HDF5 references

* Abstract out data interaction using setter and getter; allows to use same methods for classes with hdf5 refs

* Use arrays, not references, in the `archive.results` section

* Lock the state for using nexus file and corresponding references

* Populate results without references

* Make a general reader for raw files

* Remove nexus flags

* Add quantity for auxialiary file

* Fix rebase

* Make integration_time as hdf5reference

* Reset results (refactor)

* Add backward compatibility

* Refactor reader

* add missing imports

* AttrDict class

* Make concept map global

* Add function to remove nexus annotations in concept map

* Move try block inside walk_through_object

* Fix imports

* Add methods for generating hdf5 file

* Rename auxiliary file

* Expect aux file to be .nxs in the beginning

* Add attributes for hdf5: data_dict, dataset_paths

* Method for adding a quantity to hdf5_data_dict

* Abstract out methods for creating files based on hdf5_data_dict

* Add dataset_paths for nexus

* Some reverting back

* Minor fixes

* Refactor populate_hdf5_data_dict: store a reference to be made later

* Handle shift from nxs to hdf5

* Set hdf5 references after aux file is created

* Cleaning

* Fixing

* Redefine result sections instead of extending

* Remove plotly plots from ELN

* Read util for hdf5 ref

* Fixing

* Move hdf5 handling into a util class

* Refactor instance variables

* Reset data dicts and reference after each writing

* Fixing

* Overwrite dataset if it already exists

* Refactor add_dataset

* Reorganize and doctrings

* Rename variable

* Add read_dataset method

* Cleaning

* Adapting schema with hdf5 handler

* Cooments, minor refactoring

* Fixing; add `hdf5_handler` as an attribute for archive

* Reorganization

* Fixing

* Refactoring

* Cleaning

* Try block for using hdf5 handler: dont fail early, as later normalization steps will have the handler!

* Extract units from dataset attrs when reading

* Fixing

* Linting

* Make archive_path optional in add_dataset

* Rename class

* attrs for add_dataset; use it for units

* Add add_attribute method

* Refactor add_attribute

* Add plot attributes: 1D

* Refactor hdf5 states

* Add back plotly figures

* rename auxiliary file name if changed by handler

* Add referenced plots

* Allow hard link using internel reference

* Add sections for plots

* Comment out validation

* Add archive paths for the plot subsections

* Add back validation with flag

* Use nexus flag

* Add interpolated intensity data into h5 for qspace plots

* Use prefix to reduce len of string

* Store regularized linespace of q vectors; revise descriptions

* Remove plotly plots

* Bring plots to overview

* Fix tests

* Linting; remove attr arg from add_dataset

* Review: move none check into method

* Review: use 'with' for opening h5 file

* Review: make internal states as private vars

* Add pydantic basemodel for dataset

* Use data from variables if available for reading

* Review: remove lazy arg

* Move DatasetModel outside Handler class

* Remove None from get, as it is already a default

* Merge if conditions

---------

Co-authored-by: Andrea Albino <andrea.albino@physik.hu-berlin.de>
Co-authored-by: Andrea Albino <95371554+aalbino2@users.noreply.github.com>
Co-authored-by: Hampus Näsström <hampus.nasstrom@gmail.com>
---
 src/nomad_measurements/utils.py      | 331 ++++++++++++++
 src/nomad_measurements/xrd/nx.py     | 185 +-------
 src/nomad_measurements/xrd/schema.py | 632 ++++++++++++++++++++-------
 3 files changed, 827 insertions(+), 321 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 250e030f..876d25b4 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -15,12 +15,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import collections
 import os.path
+import re
 from typing import (
     TYPE_CHECKING,
+    Any,
+    Optional,
 )
 
+import h5py
 import numpy as np
+import pint
+from nomad.datamodel.hdf5 import HDF5Reference
+from nomad.units import ureg
+from pydantic import BaseModel, Field
 
 if TYPE_CHECKING:
     from nomad.datamodel.data import (
@@ -166,3 +175,325 @@ def get_bounding_range_2d(ax1, ax2):
         ]
 
     return ax1_range, ax2_range
+
+
+class DatasetModel(BaseModel):
+    """
+    Pydantic model for the dataset to be stored in the HDF5 file.
+    """
+
+    data: Any = Field(description='The data to be stored in the HDF5 file.')
+    archive_path: Optional[str] = Field(
+        None, description='The path of the quantity in the NOMAD archive.'
+    )
+    internal_reference: Optional[bool] = Field(
+        False,
+        description='If True, an internal reference is set to an existing HDF5 '
+        'dataset.',
+    )
+
+
+class HDF5Handler:
+    """
+    Class for handling the creation of auxiliary files to store big data arrays outside
+    the main archive file (e.g. HDF5, NeXus).
+    """
+
+    def __init__(
+        self,
+        filename: str,
+        archive: 'EntryArchive',
+        logger: 'BoundLogger',
+        valid_dataset_paths: list = None,
+        nexus: bool = False,
+    ):
+        """
+        Initialize the handler.
+
+        Args:
+            filename (str): The name of the auxiliary file.
+            archive (EntryArchive): The NOMAD archive.
+            logger (BoundLogger): A structlog logger.
+            valid_dataset_paths (list): The list of valid dataset paths.
+            nexus (bool): If True, the file is created as a NeXus file.
+        """
+        if not filename.endswith(('.nxs', '.h5')):
+            raise ValueError('Only .h5 or .nxs files are supported.')
+
+        self.data_file = filename
+        self.archive = archive
+        self.logger = logger
+        self.valid_dataset_paths = []
+        if valid_dataset_paths:
+            self.valid_dataset_paths = valid_dataset_paths
+        self.nexus = nexus
+
+        self._hdf5_datasets = collections.OrderedDict()
+        self._hdf5_attributes = collections.OrderedDict()
+
+    def add_dataset(
+        self,
+        path: str,
+        params: dict,
+        validate_path: bool = True,
+    ):
+        """
+        Add a dataset to the HDF5 file. The dataset is written lazily to the file
+        when `write_file` method is called. The `path` is validated against the
+        `valid_dataset_paths` if provided before adding the data.
+
+        `params` should be a dictionary containing `data`. Optionally,
+        it can also contain `archive_path` and `internal_reference`:
+        {
+            'data': Any,
+            'archive_path': str,
+            'internal_reference': bool,
+        }
+
+        Args:
+            path (str): The dataset path to be used in the HDF5 file.
+            params (dict): The dataset parameters.
+            validate_path (bool): If True, the dataset path is validated.
+        """
+        if not params:
+            self.logger.warning('Dataset `params` must be provided.')
+            return
+
+        dataset = DatasetModel(
+            **params,
+        )
+        if (
+            validate_path
+            and self.valid_dataset_paths
+            and path not in self.valid_dataset_paths
+        ):
+            self.logger.warning(f'Invalid dataset path "{path}".')
+            return
+
+        # handle the pint.Quantity and add data
+        if isinstance(dataset.data, pint.Quantity):
+            self.add_attribute(
+                path=path,
+                params=dict(
+                    units=str(dataset.data.units),
+                ),
+            )
+            dataset.data = dataset.data.magnitude
+
+        self._hdf5_datasets[path] = dataset
+
+    def add_attribute(
+        self,
+        path: str,
+        params: dict,
+    ):
+        """
+        Add an attribute to the dataset or group at the given path. The attribute is
+        written lazily to the file when `write_file` method is called.
+
+        Args:
+            path (str): The dataset or group path in the HDF5 file.
+            params (dict): The attributes to be added.
+        """
+        if not params:
+            self.logger.warning('Attribute `params` must be provided.')
+            return
+        self._hdf5_attributes[path] = params
+
+    def read_dataset(self, path: str):
+        """
+        Returns the dataset at the given path. If the quantity has `units` as an
+        attribute, tries to returns a `pint.Quantity`.
+        If the dataset available in the `self._hdf5_datasets`, it is returned directly.
+
+        Args:
+            path (str): The dataset path in the HDF5 file.
+        """
+        if path is None:
+            return
+        file_path, dataset_path = path.split('#')
+
+        # find path in the instance variables
+        value = None
+        if dataset_path in self._hdf5_datasets:
+            value = self._hdf5_datasets[dataset_path].data
+            if dataset_path in self._hdf5_attributes:
+                units = self._hdf5_attributes[dataset_path].get('units')
+                if units:
+                    value *= ureg(units)
+            return value
+
+        file_name = file_path.rsplit('/raw/', 1)[1]
+        with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5:
+            if dataset_path not in h5:
+                self.logger.warning(f'Dataset "{dataset_path}" not found.')
+            else:
+                value = h5[dataset_path][...]
+                try:
+                    units = h5[dataset_path].attrs['units']
+                    value *= ureg(units)
+                except KeyError:
+                    pass
+        return value
+
+    def write_file(self):
+        """
+        Method for creating an auxiliary file to store big data arrays outside the
+        main archive file (e.g. HDF5, NeXus).
+        """
+        if self.nexus:
+            try:
+                self._write_nx_file()
+            except Exception as e:
+                self.nexus = False
+                self.logger.warning(
+                    f'Encountered "{e}" error while creating nexus file. '
+                    'Creating h5 file instead.'
+                )
+                self._write_hdf5_file()
+        else:
+            self._write_hdf5_file()
+
+    def _write_nx_file(self):
+        """
+        Method for creating a NeXus file. Additional data from the archive is added
+        to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus
+        view of the data in addition to storing array data.
+        """
+        if self.data_file.endswith('.h5'):
+            self.data_file = self.data_file.replace('.h5', '.nxs')
+        raise NotImplementedError('Method `write_nx_file` is not implemented.')
+        # TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use
+        # `populate_hdf5_data_dict` method for each quantity that is needed in .nxs
+        # file. Create a NeXus file with the data in `hdf5_data_dict`.
+        # One issue here is as we populate the `hdf5_data_dict` with the archive data,
+        # we will always have to over write the nexus file
+
+    def _write_hdf5_file(self):  # noqa: PLR0912
+        """
+        Method for creating an HDF5 file.
+        """
+        if self.data_file.endswith('.nxs'):
+            self.data_file = self.data_file.replace('.nxs', '.h5')
+        if not self._hdf5_datasets and not self._hdf5_attributes:
+            return
+        # remove the nexus annotations from the dataset paths if any
+        tmp_dict = {}
+        for key, value in self._hdf5_datasets.items():
+            new_key = self._remove_nexus_annotations(key)
+            tmp_dict[new_key] = value
+        self._hdf5_datasets = tmp_dict
+        tmp_dict = {}
+        for key, value in self._hdf5_attributes.items():
+            tmp_dict[self._remove_nexus_annotations(key)] = value
+        self._hdf5_attributes = tmp_dict
+
+        # create the HDF5 file
+        mode = 'r+b' if self.archive.m_context.raw_path_exists(self.data_file) else 'wb'
+        with h5py.File(
+            self.archive.m_context.raw_file(self.data_file, mode), 'a'
+        ) as h5:
+            for key, value in self._hdf5_datasets.items():
+                if value.data is None:
+                    self.logger.warning(f'No data found for "{key}". Skipping.')
+                    continue
+                elif value.internal_reference:
+                    # resolve the internal reference
+                    try:
+                        data = h5[self._remove_nexus_annotations(value.data)]
+                    except KeyError:
+                        self.logger.warning(
+                            f'Internal reference "{value.data}" not found. Skipping.'
+                        )
+                        continue
+                else:
+                    data = value.data
+
+                group_name, dataset_name = key.rsplit('/', 1)
+                group = h5.require_group(group_name)
+
+                if key in h5:
+                    group[dataset_name][...] = data
+                else:
+                    group.create_dataset(
+                        name=dataset_name,
+                        data=data,
+                    )
+                self._set_hdf5_reference(
+                    self.archive,
+                    value.archive_path,
+                    f'/uploads/{self.archive.m_context.upload_id}/raw'
+                    f'/{self.data_file}#{key}',
+                )
+            for key, value in self._hdf5_attributes.items():
+                if key in h5:
+                    h5[key].attrs.update(value)
+                else:
+                    self.logger.warning(f'Path "{key}" not found to add attribute.')
+
+        # reset hdf5 datasets and atttributes
+        self._hdf5_datasets = collections.OrderedDict()
+        self._hdf5_attributes = collections.OrderedDict()
+
+    @staticmethod
+    def _remove_nexus_annotations(path: str) -> str:
+        """
+        Remove the nexus related annotations from the dataset path.
+        For e.g.,
+        '/ENTRY[entry]/experiment_result/intensity' ->
+        '/entry/experiment_result/intensity'
+
+        Args:
+            path (str): The dataset path with nexus annotations.
+
+        Returns:
+            str: The dataset path without nexus annotations.
+        """
+        if not path:
+            return path
+
+        pattern = r'.*\[.*\]'
+        new_path = ''
+        for part in path.split('/')[1:]:
+            if re.match(pattern, part):
+                new_path += '/' + part.split('[')[0].strip().lower()
+            else:
+                new_path += '/' + part
+        new_path = new_path.replace('.nxs', '.h5')
+        return new_path
+
+    @staticmethod
+    def _set_hdf5_reference(
+        section: 'ArchiveSection' = None, path: str = None, ref: str = None
+    ):
+        """
+        Method for setting a HDF5Reference quantity in a section. It can handle
+        nested quantities and repeatable sections, provided that the quantity itself
+        is of type `HDF5Reference`.
+        For example, one can set the reference for a quantity path like
+        `data.results[0].intensity`.
+
+        Args:
+            section (Section): The NOMAD section containing the quantity.
+            path (str): The path to the quantity.
+            ref (str): The reference to the HDF5 dataset.
+        """
+        # TODO handle the case when section in the path is not initialized
+
+        if not section or not path or not ref:
+            return
+        attr = section
+        path = path.split('.')
+        quantity_name = path.pop()
+
+        for subpath in path:
+            if re.match(r'.*\[.*\]', subpath):
+                index = int(subpath.split('[')[1].split(']')[0])
+                attr = attr.m_get(subpath.split('[')[0], index=index)
+            else:
+                attr = attr.m_get(subpath)
+
+        if isinstance(
+            attr.m_get_quantity_definition(quantity_name).type, HDF5Reference
+        ):
+            attr.m_set(quantity_name, ref)
diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py
index e1b41fcf..db73af2f 100644
--- a/src/nomad_measurements/xrd/nx.py
+++ b/src/nomad_measurements/xrd/nx.py
@@ -15,168 +15,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import TYPE_CHECKING
 
-from pynxtools import dataconverter
-from pynxtools.nomad.dataconverter import populate_nexus_subsection
-
-if TYPE_CHECKING:
-    from nomad.datamodel.datamodel import EntryArchive
-    from structlog.stdlib import (
-        BoundLogger,
-    )
-
-
-def walk_through_object(parent_obj, attr_chain, default=None):
-    """
-    Walk though the object until reach the leaf.
-
-    Args:
-        parent_obj: This is a python obj.
-        attr_chain: Dot separated obj chain.
-        default: A value to be returned by default, if not data is found.
-    """
-    expected_parts = 2
-    if isinstance(attr_chain, str):
-        parts = attr_chain.split('.', 1)
-
-        if len(parts) == expected_parts:
-            child_nm, rest_part = parts
-            if '[' in child_nm:
-                child_nm, index = child_nm.split('[')
-                index = int(index[:-1])
-                child_obj = getattr(parent_obj, child_nm)[index]
-            else:
-                child_obj = getattr(parent_obj, child_nm)
-            return walk_through_object(child_obj, rest_part, default=default)
-        else:
-            return getattr(parent_obj, attr_chain, default)
-
-
-def connect_concepts(template, archive: 'EntryArchive', scan_type: str):  # noqa: PLR0912
-    """
-    Connect the concepts between `ELNXrayDiffraction` and `NXxrd_pan` schema.
-
-    Args:
-        template (Template): The pynxtools template, a inherited class from python dict.
-        archive (EntryArchive): Nomad archive contains secttions, subsections and
-            quantities.
-        scan_type (str): Name of the scan type such as line and RSM.
-    """
-
-    # General concepts
-    # ruff: noqa: E501
-    concept_map = {
-        '/ENTRY[entry]/method': 'archive.data.method',
-        '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
-        '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity.magnitude',
-        '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta.magnitude',
-        '/ENTRY[entry]/experiment_result/two_theta/@units': 'archive.data.results[0].two_theta.units',
-        '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega.magnitude',
-        '/ENTRY[entry]/experiment_result/omega/@units': 'archive.data.results[0].omega.units',
-        '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi.magnitude',
-        '/ENTRY[entry]/experiment_result/chi/@units': 'archive.data.results[0].chi.units',
-        '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi.magnitude',
-        '/ENTRY[entry]/experiment_result/phi/@units': 'archive.data.results[0].phi.units',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis',
-        '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time.magnitude',
-        'line': '',  # For future implementation
-        'rsm': {
-            '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel',
-            '/ENTRY[entry]/experiment_result/q_parallel/@units': 'archive.data.results[0].q_parallel.units',
-            '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular.magnitude',
-            '/ENTRY[entry]/experiment_result/q_perpendicular/@units': 'archive.data.results[0].q_perpendicular.units',
-            '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm.magnitude',
-            '/ENTRY[entry]/experiment_result/q_norm/@units': 'archive.data.results[0].q_norm.units',
-        },
-        # Source
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current.magnitude',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current/@units': 'archive.data.xrd_settings.source.xray_tube_current.units',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage.magnitude',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage/@units': 'archive.data.xrd_settings.source.xray_tube_voltage.units',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one.magnitude',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one/@units': 'archive.data.xrd_settings.source.kalpha_one.units',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two.magnitude',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two/@units': 'archive.data.xrd_settings.source.kalpha_two.units',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta.magnitude',
-        '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta/@units': 'archive.data.xrd_settings.source.kbeta.units',
-    }
-
-    for key, archive_concept in concept_map.items():
-        if isinstance(archive_concept, dict):
-            if key == scan_type:
-                for sub_key, sub_archive_concept in archive_concept.items():
-                    _, arch_attr = sub_archive_concept.split('.', 1)
-                    value = None
-                    try:
-                        value = walk_through_object(archive, arch_attr)
-                    except (AttributeError, IndexError, KeyError, ValueError):
-                        pass
-                    finally:
-                        if value is not None:
-                            template[sub_key] = (
-                                str(value) if sub_key.endswith('units') else value
-                            )
-            else:
-                continue
-        elif archive_concept:
-            _, arch_attr = archive_concept.split('.', 1)
-            value = None
-            try:
-                value = walk_through_object(archive, arch_attr)
-            # Use multiple excepts to avoid catching all exceptions
-            except (AttributeError, IndexError, KeyError, ValueError):
-                pass
-            finally:
-                if value is not None:
-                    template[key] = str(value) if key.endswith('units') else value
-
-    template['/ENTRY[entry]/definition'] = 'NXxrd_pan'
-
-    # Links to the data and concepts
-    template['/ENTRY[entry]/@default'] = 'experiment_result'
-    template['/ENTRY[entry]/experiment_result/@signal'] = 'intensity'
-    template['/ENTRY[entry]/experiment_result/@axes'] = 'two_theta'
-    template['/ENTRY[entry]/q_data/q'] = {
-        'link': '/ENTRY[entry]/experiment_result/q_norm'
-    }
-    template['/ENTRY[entry]/q_data/intensity'] = {
-        'link': '/ENTRY[entry]/experiment_result/intensity'
-    }
-    template['/ENTRY[entry]/q_data/q_parallel'] = {
-        'link': '/ENTRY[entry]/experiment_result/q_parallel'
-    }
-    template['/ENTRY[entry]/q_data/q_perpendicular'] = {
-        'link': '/ENTRY[entry]/experiment_result/q_perpendicular'
-    }
-
-
-def write_nx_section_and_create_file(
-    archive: 'EntryArchive', logger: 'BoundLogger', scan_type: str = 'line'
-):
-    """
-    Uses the archive to generate the NeXus section and .nxs file.
-
-    Args:
-        archive (EntryArchive): The archive containing the section.
-        logger (BoundLogger): A structlog logger.
-        generate_nexus_file (boolean): If True, the function will generate a .nxs file.
-        nxs_as_entry (boolean): If True, the function will generate a .nxs file
-                as a nomad entry.
-    """
-    nxdl_root, _ = dataconverter.helpers.get_nxdl_root_and_path('NXxrd_pan')
-    template = dataconverter.template.Template()
-    dataconverter.helpers.generate_template_from_nxdl(nxdl_root, template)
-    connect_concepts(template, archive, scan_type=scan_type)
-    archive_name = archive.metadata.mainfile.split('.')[0]
-    nexus_output = f'{archive_name}.nxs'
-
-    populate_nexus_subsection(
-        template=template,
-        app_def='NXxrd_pan',
-        archive=archive,
-        logger=logger,
-        output_file_path=nexus_output,
-    )
+NEXUS_DATASET_PATHS = [
+    '/ENTRY[entry]/experiment_result/intensity',
+    '/ENTRY[entry]/experiment_result/two_theta',
+    '/ENTRY[entry]/experiment_result/omega',
+    '/ENTRY[entry]/experiment_result/chi',
+    '/ENTRY[entry]/experiment_result/phi',
+    '/ENTRY[entry]/experiment_config/count_time',
+    '/ENTRY[entry]/experiment_result/q_norm',
+    '/ENTRY[entry]/experiment_result/q_parallel',
+    '/ENTRY[entry]/experiment_result/q_perpendicular',
+    '/ENTRY[entry]/method',
+    '/ENTRY[entry]/measurement_type',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
+]
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 2923a17e..80ec1dc3 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -22,19 +22,25 @@
 )
 
 import numpy as np
+import pint
 import plotly.express as px
 from fairmat_readers_xrd import (
     read_bruker_brml,
     read_panalytical_xrdml,
     read_rigaku_rasx,
 )
+from nomad.config import config
 from nomad.datamodel.data import (
     ArchiveSection,
     EntryData,
 )
+from nomad.datamodel.hdf5 import (
+    HDF5Reference,
+)
 from nomad.datamodel.metainfo.annotations import (
     ELNAnnotation,
     ELNComponentEnum,
+    H5WebAnnotation,
 )
 from nomad.datamodel.metainfo.basesections import (
     CompositeSystemReference,
@@ -42,10 +48,7 @@
     MeasurementResult,
     ReadableIdentifiers,
 )
-from nomad.datamodel.metainfo.plot import (
-    PlotlyFigure,
-    PlotSection,
-)
+from nomad.datamodel.metainfo.plot import PlotlyFigure
 from nomad.datamodel.results import (
     DiffractionPattern,
     MeasurementMethod,
@@ -67,11 +70,14 @@
 from nomad_measurements.general import (
     NOMADMeasurementsCategory,
 )
-from nomad_measurements.utils import get_bounding_range_2d, merge_sections
-from nomad_measurements.xrd.nx import write_nx_section_and_create_file
+from nomad_measurements.utils import (
+    HDF5Handler,
+    get_bounding_range_2d,
+    merge_sections,
+)
+from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS
 
 if TYPE_CHECKING:
-    import pint
     from nomad.datamodel.datamodel import (
         EntryArchive,
     )
@@ -80,18 +86,16 @@
     )
 
 
-from nomad.config import config
-
 configuration = config.get_plugin_entry_point('nomad_measurements.xrd:schema')
 
 m_package = SchemaPackage(aliases=['nomad_measurements.xrd.parser.parser'])
 
 
 def calculate_two_theta_or_q(
-    wavelength: 'pint.Quantity',
-    q: 'pint.Quantity' = None,
-    two_theta: 'pint.Quantity' = None,
-) -> tuple['pint.Quantity', 'pint.Quantity']:
+    wavelength: pint.Quantity,
+    q: pint.Quantity = None,
+    two_theta: pint.Quantity = None,
+) -> tuple[pint.Quantity, pint.Quantity]:
     """
     Calculate the two-theta array from the scattering vector (q) or vice-versa,
     given the wavelength of the X-ray source.
@@ -113,10 +117,10 @@ def calculate_two_theta_or_q(
     return q, two_theta
 
 
-def calculate_q_vectors_RSM(
-    wavelength: 'pint.Quantity',
-    two_theta: 'pint.Quantity',
-    omega: 'pint.Quantity',
+def calculate_q_vectors_rsm(
+    wavelength: pint.Quantity,
+    two_theta: pint.Quantity,
+    omega: pint.Quantity,
 ):
     """
     Calculate the q-vectors for RSM scans in coplanar configuration.
@@ -265,6 +269,205 @@ class XRDSettings(ArchiveSection):
     source = SubSection(section_def=XRayTubeSource)
 
 
+class XRDResultPlotIntensity(ArchiveSection):
+    m_def = Section(
+        a_h5web=H5WebAnnotation(
+            axes=['two_theta', 'omega', 'phi', 'chi'], signal='intensity'
+        )
+    )
+    intensity = Quantity(
+        type=HDF5Reference,
+        description='The count at each 2-theta value, dimensionless',
+    )
+    two_theta = Quantity(
+        type=HDF5Reference,
+        description='The 2-theta range of the diffractogram',
+    )
+    omega = Quantity(
+        type=HDF5Reference,
+        description='The omega range of the diffractogram',
+    )
+
+    def normalize(self, archive, logger):
+        super().normalize(archive, logger)
+        prefix = '/ENTRY[entry]/experiment_result'
+        try:
+            hdf5_handler = self.m_parent.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
+            return
+
+        if self.intensity is None or self.two_theta is None:
+            return
+
+        hdf5_handler.add_dataset(
+            path=f'{prefix}/plot_intensity/two_theta',
+            params=dict(
+                data=f'{prefix}/two_theta',
+                archive_path='data.results[0].plot_intensity.two_theta',
+                internal_reference=True,
+            ),
+            validate_path=False,
+        )
+        hdf5_handler.add_dataset(
+            path=f'{prefix}/plot_intensity/intensity',
+            params=dict(
+                data=f'{prefix}/intensity',
+                archive_path='data.results[0].plot_intensity.intensity',
+                internal_reference=True,
+            ),
+            validate_path=False,
+        )
+        hdf5_handler.add_attribute(
+            path=f'{prefix}/plot_intensity',
+            params=dict(
+                axes='two_theta',
+                signal='intensity',
+                NX_class='NXdata',
+            ),
+        )
+        for var_axis in ['omega', 'phi', 'chi']:
+            if self.get(var_axis) is not None:
+                hdf5_handler.add_dataset(
+                    path=f'{prefix}/plot_intensity/{var_axis}',
+                    params=dict(
+                        data=f'{prefix}/{var_axis}',
+                        archive_path=f'data.results[0].plot_intensity.{var_axis}',
+                        internal_reference=True,
+                    ),
+                    validate_path=False,
+                )
+                hdf5_handler.add_attribute(
+                    path=f'{prefix}/plot_intensity',
+                    params=dict(
+                        axes=[var_axis, 'two_theta'],
+                        signal='intensity',
+                        NX_class='NXdata',
+                    ),
+                )
+                break
+
+        hdf5_handler.write_file()
+
+
+class XRDResultPlotIntensityScatteringVector(ArchiveSection):
+    m_def = Section(
+        a_h5web=H5WebAnnotation(
+            axes=['q_parallel', 'q_perpendicular', 'q_norm'], signal='intensity'
+        )
+    )
+    intensity = Quantity(
+        type=HDF5Reference,
+        description="""
+        The count at each q value. In case of RSM, it contains interpolated values of
+        `intensity` at regularized grid of `q` vectors.
+        """,
+    )
+    q_norm = Quantity(
+        type=HDF5Reference,
+        description='The q range of the diffractogram',
+    )
+    q_parallel = Quantity(
+        type=HDF5Reference,
+        description='The regularized grid of `q_parallel` range for plotting.',
+    )
+    q_perpendicular = Quantity(
+        type=HDF5Reference,
+        description='The regularized grid of `q_perpendicular` range for plotting.',
+    )
+
+    def normalize(self, archive, logger):
+        super().normalize(archive, logger)
+        prefix = '/ENTRY[entry]/experiment_result'
+        try:
+            hdf5_handler = self.m_parent.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
+            return
+
+        if self.intensity is None:
+            return
+
+        if self.q_norm is not None:
+            hdf5_handler.add_dataset(
+                path=f'{prefix}/plot_intensity_scattering_vector/intensity',
+                params=dict(
+                    data=f'{prefix}/intensity',
+                    archive_path='data.results[0].plot_intensity_scattering_vector.intensity',
+                    internal_reference=True,
+                ),
+                validate_path=False,
+            )
+            hdf5_handler.add_dataset(
+                path=f'{prefix}/plot_intensity_scattering_vector/q_norm',
+                params=dict(
+                    data=f'{prefix}/q_norm',
+                    archive_path='data.results[0].plot_intensity_scattering_vector.q_norm',
+                    internal_reference=True,
+                ),
+                validate_path=False,
+            )
+            hdf5_handler.add_attribute(
+                path=f'{prefix}/plot_intensity_scattering_vector',
+                params=dict(
+                    axes='q_norm',
+                    signal='intensity',
+                    NX_class='NXdata',
+                ),
+            )
+        elif self.q_parallel is not None and self.q_perpendicular is not None:
+            intensity = hdf5_handler.read_dataset(self.intensity)
+            q_parallel = hdf5_handler.read_dataset(self.q_parallel)
+            q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular)
+            # q_vectors lead to irregular grid
+            # generate a regular grid using interpolation
+            x = q_parallel.to('1/angstrom').magnitude.flatten()
+            y = q_perpendicular.to('1/angstrom').magnitude.flatten()
+            x_regular = np.linspace(x.min(), x.max(), intensity.shape[0])
+            y_regular = np.linspace(y.min(), y.max(), intensity.shape[1])
+            x_grid, y_grid = np.meshgrid(x_regular, y_regular)
+            z_interpolated = griddata(
+                points=(x, y),
+                values=intensity.flatten(),
+                xi=(x_grid, y_grid),
+                method='linear',
+                fill_value=intensity.min(),
+            )
+            hdf5_handler.add_dataset(
+                path=f'{prefix}/plot_intensity_scattering_vector/q_parallel',
+                params=dict(
+                    data=x_regular,
+                    archive_path='data.results[0].plot_intensity_scattering_vector.q_parallel',
+                ),
+                validate_path=False,
+            )
+            hdf5_handler.add_dataset(
+                path=f'{prefix}/plot_intensity_scattering_vector/q_perpendicular',
+                params=dict(
+                    data=y_regular,
+                    archive_path='data.results[0].plot_intensity_scattering_vector.q_perpendicular',
+                ),
+                validate_path=False,
+            )
+            hdf5_handler.add_dataset(
+                path=f'{prefix}/plot_intensity_scattering_vector/intensity',
+                params=dict(
+                    data=z_interpolated,
+                    archive_path='data.results[0].plot_intensity_scattering_vector.intensity',
+                ),
+                validate_path=False,
+            )
+            hdf5_handler.add_attribute(
+                path=f'{prefix}/plot_intensity_scattering_vector',
+                params=dict(
+                    axes=['q_perpendicular', 'q_parallel'],
+                    signal='intensity',
+                    NX_class='NXdata',
+                ),
+            )
+        hdf5_handler.write_file()
+
+
 class XRDResult(MeasurementResult):
     """
     Section containing the result of an X-ray diffraction scan.
@@ -272,52 +475,28 @@ class XRDResult(MeasurementResult):
 
     m_def = Section()
 
-    array_index = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        description=(
-            'A placeholder for the indices of vectorial quantities. '
-            'Used as x-axis for plots within quantities.'
-        ),
-        a_display={'visible': False},
-    )
     intensity = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='dimensionless',
+        type=HDF5Reference,
         description='The count at each 2-theta value, dimensionless',
-        a_plot={'x': 'array_index', 'y': 'intensity'},
     )
     two_theta = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='deg',
+        type=HDF5Reference,
         description='The 2-theta range of the diffractogram',
-        a_plot={'x': 'array_index', 'y': 'two_theta'},
     )
     q_norm = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='meter**(-1)',
+        type=HDF5Reference,
         description='The norm of scattering vector *Q* of the diffractogram',
-        a_plot={'x': 'array_index', 'y': 'q_norm'},
     )
     omega = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='deg',
+        type=HDF5Reference,
         description='The omega range of the diffractogram',
     )
     phi = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='deg',
+        type=HDF5Reference,
         description='The phi range of the diffractogram',
     )
     chi = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*'],
-        unit='deg',
+        type=HDF5Reference,
         description='The chi range of the diffractogram',
     )
     source_peak_wavelength = Quantity(
@@ -331,11 +510,13 @@ class XRDResult(MeasurementResult):
         description='Axis scanned',
     )
     integration_time = Quantity(
-        type=np.dtype(np.float64),
-        unit='s',
-        shape=['*'],
+        type=HDF5Reference,
         description='Integration time per channel',
     )
+    plot_intensity = SubSection(section_def=XRDResultPlotIntensity)
+    plot_intensity_scattering_vector = SubSection(
+        section_def=XRDResultPlotIntensityScatteringVector
+    )
 
 
 class XRDResult1D(XRDResult):
@@ -343,9 +524,7 @@ class XRDResult1D(XRDResult):
     Section containing the result of a 1D X-ray diffraction scan.
     """
 
-    m_def = Section()
-
-    def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
+    def generate_plots(self):
         """
         Plot the 1D diffractogram.
 
@@ -358,12 +537,20 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             (dict, dict): line_linear, line_log
         """
         plots = []
-        if self.two_theta is None or self.intensity is None:
+
+        try:
+            hdf5_handler = self.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
             return plots
 
-        x = self.two_theta.to('degree').magnitude
-        y = self.intensity.magnitude
+        two_theta = hdf5_handler.read_dataset(self.two_theta)
+        intensity = hdf5_handler.read_dataset(self.intensity)
+        if two_theta is None or intensity is None:
+            return plots
 
+        x = two_theta.to('degree').magnitude
+        y = intensity.magnitude
         fig_line_linear = px.line(
             x=x,
             y=y,
@@ -449,10 +636,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             )
         )
 
-        if self.q_norm is None:
+        q_norm = hdf5_handler.read_dataset(self.q_norm)
+        if q_norm is None:
             return plots
 
-        x = self.q_norm.to('1/angstrom').magnitude
+        x = q_norm.to('1/angstrom').magnitude
         fig_line_log = px.line(
             x=x,
             y=y,
@@ -515,12 +703,45 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                 self.name = f'{self.scan_axis} Scan Result'
             else:
                 self.name = 'XRD Scan Result'
+
+        try:
+            hdf5_handler = self.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
+            return
+
         if self.source_peak_wavelength is not None:
-            self.q_norm, self.two_theta = calculate_two_theta_or_q(
+            q_norm = hdf5_handler.read_dataset(self.q_norm)
+            two_theta = hdf5_handler.read_dataset(self.two_theta)
+            q_norm, two_theta = calculate_two_theta_or_q(
                 wavelength=self.source_peak_wavelength,
-                two_theta=self.two_theta,
-                q=self.q_norm,
+                two_theta=two_theta,
+                q=q_norm,
+            )
+            hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/q_norm',
+                params=dict(
+                    data=q_norm,
+                    archive_path='data.results[0].q_norm',
+                ),
+            )
+            hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/two_theta',
+                params=dict(
+                    data=two_theta,
+                    archive_path='data.results[0].two_theta',
+                ),
             )
+            hdf5_handler.write_file()
+            self.m_setdefault('plot_intensity_scattering_vector')
+            self.plot_intensity_scattering_vector.intensity = self.intensity
+            self.plot_intensity_scattering_vector.q_norm = self.q_norm
+            self.plot_intensity_scattering_vector.normalize(archive, logger)
+
+        self.m_setdefault('plot_intensity')
+        self.plot_intensity.intensity = self.intensity
+        self.plot_intensity.two_theta = self.two_theta
+        self.plot_intensity.normalize(archive, logger)
 
 
 class XRDResultRSM(XRDResult):
@@ -528,27 +749,16 @@ class XRDResultRSM(XRDResult):
     Section containing the result of a Reciprocal Space Map (RSM) scan.
     """
 
-    m_def = Section()
     q_parallel = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*', '*'],
-        unit='meter**(-1)',
+        type=HDF5Reference,
         description='The scattering vector *Q_parallel* of the diffractogram',
     )
     q_perpendicular = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*', '*'],
-        unit='meter**(-1)',
+        type=HDF5Reference,
         description='The scattering vector *Q_perpendicular* of the diffractogram',
     )
-    intensity = Quantity(
-        type=np.dtype(np.float64),
-        shape=['*', '*'],
-        unit='dimensionless',
-        description='The count at each position, dimensionless',
-    )
 
-    def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
+    def generate_plots(self):
         """
         Plot the 2D RSM diffractogram.
 
@@ -561,14 +771,24 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             (dict, dict): json_2theta_omega, json_q_vector
         """
         plots = []
-        if self.two_theta is None or self.intensity is None or self.omega is None:
+
+        try:
+            hdf5_handler = self.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
+            return plots
+
+        two_theta = hdf5_handler.read_dataset(self.two_theta)
+        intensity = hdf5_handler.read_dataset(self.intensity)
+        omega = hdf5_handler.read_dataset(self.omega)
+        if two_theta is None or intensity is None or omega is None:
             return plots
 
         # Plot for 2theta-omega RSM
         # Zero values in intensity become -inf in log scale and are not plotted
-        x = self.omega.to('degree').magnitude
-        y = self.two_theta.to('degree').magnitude
-        z = self.intensity.magnitude
+        x = omega.to('degree').magnitude
+        y = two_theta.to('degree').magnitude
+        z = intensity.magnitude
         log_z = np.log10(z)
         x_range, y_range = get_bounding_range_2d(x, y)
 
@@ -636,9 +856,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         )
 
         # Plot for RSM in Q-vectors
-        if self.q_parallel is not None and self.q_perpendicular is not None:
-            x = self.q_parallel.to('1/angstrom').magnitude.flatten()
-            y = self.q_perpendicular.to('1/angstrom').magnitude.flatten()
+        q_parallel = hdf5_handler.read_dataset(self.q_parallel)
+        q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular)
+        if q_parallel is not None and q_perpendicular is not None:
+            x = q_parallel.to('1/angstrom').magnitude.flatten()
+            y = q_perpendicular.to('1/angstrom').magnitude.flatten()
             # q_vectors lead to irregular grid
             # generate a regular grid using interpolation
             x_regular = np.linspace(x.min(), x.max(), z.shape[0])
@@ -721,21 +943,58 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'):
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         super().normalize(archive, logger)
+
         if self.name is None:
             self.name = 'RSM Scan Result'
-        var_axis = 'omega'
-        if self.source_peak_wavelength is not None:
-            for var_axis in ['omega', 'chi', 'phi']:
-                if (
-                    self[var_axis] is not None
-                    and len(np.unique(self[var_axis].magnitude)) > 1
-                ):
-                    self.q_parallel, self.q_perpendicular = calculate_q_vectors_RSM(
-                        wavelength=self.source_peak_wavelength,
-                        two_theta=self.two_theta * np.ones_like(self.intensity),
-                        omega=self[var_axis],
-                    )
-                    break
+
+        try:
+            hdf5_handler = self.m_parent.hdf5_handler
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
+            return
+
+        var_axis = None
+        for axis in ['omega', 'chi', 'phi']:
+            axis_value = hdf5_handler.read_dataset(getattr(self, axis))
+            if axis_value is not None and len(np.unique(axis_value.magnitude)) > 1:
+                var_axis = axis
+                break
+
+        if self.source_peak_wavelength is not None and var_axis is not None:
+            two_theta = hdf5_handler.read_dataset(self.two_theta)
+            intensity = hdf5_handler.read_dataset(self.intensity)
+            q_parallel, q_perpendicular = calculate_q_vectors_rsm(
+                wavelength=self.source_peak_wavelength,
+                two_theta=two_theta * np.ones_like(intensity),
+                omega=hdf5_handler.read_dataset(getattr(self, var_axis)),
+            )
+            hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/q_parallel',
+                params=dict(
+                    data=q_parallel,
+                    archive_path='data.results[0].q_parallel',
+                ),
+            )
+            hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/q_perpendicular',
+                params=dict(
+                    data=q_perpendicular,
+                    archive_path='data.results[0].q_perpendicular',
+                ),
+            )
+            hdf5_handler.write_file()
+            self.m_setdefault('plot_intensity_scattering_vector')
+            self.plot_intensity_scattering_vector.intensity = self.intensity
+            self.plot_intensity_scattering_vector.q_parallel = self.q_parallel
+            self.plot_intensity_scattering_vector.q_perpendicular = self.q_perpendicular
+            self.plot_intensity_scattering_vector.normalize(archive, logger)
+
+        if var_axis is not None:
+            self.m_setdefault('plot_intensity')
+            self.plot_intensity.intensity = self.intensity
+            self.plot_intensity.two_theta = self.two_theta
+            self.plot_intensity.m_set(var_axis, getattr(self, var_axis))
+            self.plot_intensity.normalize(archive, logger)
 
 
 class XRayDiffraction(Measurement):
@@ -802,31 +1061,39 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             archive.results = Results()
         if not archive.results.properties:
             archive.results.properties = Properties()
+        if not archive.results.method:
+            archive.results.method = Method(
+                method_name='XRD',
+                measurement=MeasurementMethod(
+                    xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name)
+                ),
+            )
+
+        try:
+            hdf5_handler = self.hdf5_handler
+        except AttributeError:
+            return
         if not archive.results.properties.structural:
             diffraction_patterns = []
             for result in self.results:
-                if len(result.intensity.shape) == 1:
+                intensity = hdf5_handler.read_dataset(result.intensity)
+                if len(intensity.shape) == 1:
+                    two_theta = hdf5_handler.read_dataset(result.two_theta)
+                    q_norm = hdf5_handler.read_dataset(result.q_norm)
                     diffraction_patterns.append(
                         DiffractionPattern(
                             incident_beam_wavelength=result.source_peak_wavelength,
-                            two_theta_angles=result.two_theta,
-                            intensity=result.intensity,
-                            q_vector=result.q_norm,
+                            two_theta_angles=two_theta,
+                            intensity=intensity,
+                            q_vector=q_norm,
                         )
                     )
             archive.results.properties.structural = StructuralProperties(
                 diffraction_pattern=diffraction_patterns
             )
-        if not archive.results.method:
-            archive.results.method = Method(
-                method_name='XRD',
-                measurement=MeasurementMethod(
-                    xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name)
-                ),
-            )
 
 
-class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
+class ELNXRayDiffraction(XRayDiffraction, EntryData):
     """
     Example section for how XRayDiffraction can be implemented with a general reader for
     common XRD file types.
@@ -841,6 +1108,12 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
         a_template={
             'measurement_identifiers': {},
         },
+        a_h5web=H5WebAnnotation(
+            paths=[
+                'results/0/plot_intensity',
+                'results/0/plot_intensity_scattering_vector',
+            ]
+        ),
     )
     data_file = Quantity(
         type=str,
@@ -849,6 +1122,14 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
             component=ELNComponentEnum.FileEditQuantity,
         ),
     )
+    auxiliary_file = Quantity(
+        type=str,
+        description='Auxiliary file (like .h5 or .nxs) containing the entry data.',
+        a_eln=ELNAnnotation(
+            component=ELNComponentEnum.FileEditQuantity,
+        ),
+    )
+    hdf5_handler = None
     measurement_identifiers = SubSection(
         section_def=ReadableIdentifiers,
     )
@@ -856,21 +1137,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection):
     diffraction_method_name.m_annotations['eln'] = ELNAnnotation(
         component=ELNComponentEnum.EnumEditQuantity,
     )
-    generate_nexus_file = Quantity(
-        type=bool,
-        description='Whether or not to generate a NeXus output file (if possible).',
-        default=True,
-        a_eln=ELNAnnotation(
-            component=ELNComponentEnum.BoolEditQuantity,
-            label='Generate NeXus file',
-        ),
-    )
 
     def get_read_write_functions(self) -> tuple[Callable, Callable]:
         """
         Method for getting the correct read and write functions for the current data
         file.
-
         Returns:
             tuple[Callable, Callable]: The read, write functions.
         """
@@ -899,49 +1170,81 @@ def write_xrd_data(
         metadata_dict: dict = xrd_dict.get('metadata', {})
         source_dict: dict = metadata_dict.get('source', {})
 
-        scan_type = metadata_dict.get('scan_type', None)
-        if scan_type == 'line':
-            result = XRDResult1D(
-                intensity=xrd_dict.get('intensity', None),
-                two_theta=xrd_dict.get('2Theta', None),
-                omega=xrd_dict.get('Omega', None),
-                chi=xrd_dict.get('Chi', None),
-                phi=xrd_dict.get('Phi', None),
-                scan_axis=metadata_dict.get('scan_axis', None),
-                integration_time=xrd_dict.get('countTime', None),
-            )
-            result.normalize(archive, logger)
+        scan_type = metadata_dict.get('scan_type')
+        if scan_type not in ['line', 'rsm']:
+            logger.error(f'Scan type `{scan_type}` is not supported.')
+            return
 
+        # Create a new result section
+        results = []
+        result = None
+        if scan_type == 'line':
+            result = XRDResult1D()
         elif scan_type == 'rsm':
-            result = XRDResultRSM(
-                intensity=xrd_dict.get('intensity', None),
-                two_theta=xrd_dict.get('2Theta', None),
-                omega=xrd_dict.get('Omega', None),
-                chi=xrd_dict.get('Chi', None),
-                phi=xrd_dict.get('Phi', None),
-                scan_axis=metadata_dict.get('scan_axis', None),
-                integration_time=xrd_dict.get('countTime', None),
+            result = XRDResultRSM()
+
+        if result is not None:
+            result.scan_axis = metadata_dict.get('scan_axis')
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/intensity',
+                params=dict(
+                    data=xrd_dict.get('intensity'),
+                    archive_path='data.results[0].intensity',
+                ),
+            )
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/two_theta',
+                params=dict(
+                    data=xrd_dict.get('2Theta'),
+                    archive_path='data.results[0].two_theta',
+                ),
+            )
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/omega',
+                params=dict(
+                    data=xrd_dict.get('Omega'),
+                    archive_path='data.results[0].omega',
+                ),
+            )
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/chi',
+                params=dict(
+                    data=xrd_dict.get('Chi'),
+                    archive_path='data.results[0].chi',
+                ),
+            )
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_result/phi',
+                params=dict(
+                    data=xrd_dict.get('Phi'),
+                    archive_path='data.results[0].phi',
+                ),
+            )
+            self.hdf5_handler.add_dataset(
+                path='/ENTRY[entry]/experiment_config/count_time',
+                params=dict(
+                    data=xrd_dict.get('countTime'),
+                    archive_path='data.results[0].integration_time',
+                ),
             )
             result.normalize(archive, logger)
-        else:
-            raise NotImplementedError(f'Scan type `{scan_type}` is not supported.')
+            results.append(result)
 
         source = XRayTubeSource(
-            xray_tube_material=source_dict.get('anode_material', None),
-            kalpha_one=source_dict.get('kAlpha1', None),
-            kalpha_two=source_dict.get('kAlpha2', None),
-            ratio_kalphatwo_kalphaone=source_dict.get('ratioKAlpha2KAlpha1', None),
-            kbeta=source_dict.get('kBeta', None),
-            xray_tube_voltage=source_dict.get('voltage', None),
-            xray_tube_current=source_dict.get('current', None),
+            xray_tube_material=source_dict.get('anode_material'),
+            kalpha_one=source_dict.get('kAlpha1'),
+            kalpha_two=source_dict.get('kAlpha2'),
+            ratio_kalphatwo_kalphaone=source_dict.get('ratioKAlpha2KAlpha1'),
+            kbeta=source_dict.get('kBeta'),
+            xray_tube_voltage=source_dict.get('voltage'),
+            xray_tube_current=source_dict.get('current'),
         )
         source.normalize(archive, logger)
-
         xrd_settings = XRDSettings(source=source)
         xrd_settings.normalize(archive, logger)
 
         samples = []
-        if metadata_dict.get('sample_id', None) is not None:
+        if metadata_dict.get('sample_id') is not None:
             sample = CompositeSystemReference(
                 lab_id=metadata_dict['sample_id'],
             )
@@ -949,12 +1252,23 @@ def write_xrd_data(
             samples.append(sample)
 
         xrd = ELNXRayDiffraction(
-            results=[result],
+            results=results,
             xrd_settings=xrd_settings,
             samples=samples,
         )
+
         merge_sections(self, xrd, logger)
 
+    def backward_compatibility(self):
+        """
+        Method for backward compatibility.
+        """
+        # Migration to using HFD5References: removing exisiting results
+        if self.get('results'):
+            self.results = []
+        if self.get('figures'):
+            self.figures = []
+
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         """
         The normalize function of the `ELNXRayDiffraction` section.
@@ -964,7 +1278,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             normalized.
             logger (BoundLogger): A structlog logger.
         """
+        self.backward_compatibility()
         if self.data_file is not None:
+            self.auxiliary_file = f'{self.data_file}.nxs'
+            self.hdf5_handler = HDF5Handler(
+                filename=self.auxiliary_file,
+                archive=archive,
+                logger=logger,
+                valid_dataset_paths=NEXUS_DATASET_PATHS,
+                nexus=True,
+            )
             read_function, write_function = self.get_read_write_functions()
             if read_function is None or write_function is None:
                 logger.warn(
@@ -974,15 +1297,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                 with archive.m_context.raw_file(self.data_file) as file:
                     xrd_dict = read_function(file.name, logger)
                 write_function(xrd_dict, archive, logger)
+                self.hdf5_handler.write_file()
+                if self.hdf5_handler.data_file != self.auxiliary_file:
+                    self.auxiliary_file = self.hdf5_handler.data_file
         super().normalize(archive, logger)
-        if not self.results:
-            return
-
-        scan_type = xrd_dict.get('metadata', {}).get('scan_type', None)
-        if self.generate_nexus_file and self.data_file is not None:
-            write_nx_section_and_create_file(archive, logger, scan_type=scan_type)
-
-        self.figures = self.results[0].generate_plots(archive, logger)
 
 
 class RawFileXRDData(EntryData):

From 2d02036a17090df6670e0750b0e79a9bfdcad60d Mon Sep 17 00:00:00 2001
From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com>
Date: Fri, 20 Dec 2024 14:33:03 +0100
Subject: [PATCH 17/41] Adding nexus in ref (#150)

* Remove the Nexus file before regenerating it.


* Reference to the NeXus entry.

* PR review comments.
---
 src/nomad_measurements/utils.py      | 146 +++++++++++++++++++++++++--
 src/nomad_measurements/xrd/nx.py     |  33 ++++++
 src/nomad_measurements/xrd/schema.py |  20 ++++
 3 files changed, 189 insertions(+), 10 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 876d25b4..56066f63 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 #
 import collections
+import copy
 import os.path
 import re
 from typing import (
@@ -30,6 +31,14 @@
 from nomad.datamodel.hdf5 import HDF5Reference
 from nomad.units import ureg
 from pydantic import BaseModel, Field
+from pynxtools.dataconverter.helpers import (
+    generate_template_from_nxdl,
+    get_nxdl_root_and_path,
+)
+from pynxtools.dataconverter.template import Template
+from pynxtools.dataconverter.writer import Writer as pynxtools_writer
+
+from nomad_measurements.xrd.nx import CONCEPT_MAP
 
 if TYPE_CHECKING:
     from nomad.datamodel.data import (
@@ -43,6 +52,10 @@
     )
 
 
+class NXFileGenerationError(Exception):
+    pass
+
+
 def get_reference(upload_id: str, entry_id: str) -> str:
     return f'../uploads/{upload_id}/archive/{entry_id}#data'
 
@@ -347,9 +360,13 @@ def write_file(self):
             except Exception as e:
                 self.nexus = False
                 self.logger.warning(
-                    f'Encountered "{e}" error while creating nexus file. '
-                    'Creating h5 file instead.'
+                    f"""NeXusFileGenerationError: Encountered '{e}' error while creating
+                    nexus file. Creating h5 file instead."""
                 )
+                if self.archive.m_context.raw_path_exists(self.data_file):
+                    os.remove(
+                        os.path.join(self.archive.m_context.raw_path(), self.data_file)
+                    )
                 self._write_hdf5_file()
         else:
             self._write_hdf5_file()
@@ -360,14 +377,62 @@ def _write_nx_file(self):
         to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus
         view of the data in addition to storing array data.
         """
-        if self.data_file.endswith('.h5'):
-            self.data_file = self.data_file.replace('.h5', '.nxs')
-        raise NotImplementedError('Method `write_nx_file` is not implemented.')
-        # TODO add archive data to `hdf5_data_dict` before creating the nexus file. Use
-        # `populate_hdf5_data_dict` method for each quantity that is needed in .nxs
-        # file. Create a NeXus file with the data in `hdf5_data_dict`.
-        # One issue here is as we populate the `hdf5_data_dict` with the archive data,
-        # we will always have to over write the nexus file
+        from nomad.processing.data import Entry
+
+        app_def = 'NXxrd_pan'
+        nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def)
+        template = Template()
+        generate_template_from_nxdl(nxdl_root, template)
+        attr_dict = {}
+        dataset_dict = {}
+        self.populate_nx_dataset_and_attribute(
+            attr_dict=attr_dict, dataset_dict=dataset_dict
+        )
+        for nx_path, dset_original in list(self._hdf5_datasets.items()) + list(
+            dataset_dict.items()
+        ):
+            dset = copy.deepcopy(dset_original)
+            if dset.internal_reference:
+                # convert to the nexus type link
+                dset.data = {'link': self._remove_nexus_annotations(dset.data)}
+
+            try:
+                template[nx_path] = dset.data
+            except KeyError:
+                template['optional'][nx_path] = dset.data
+
+            hdf5_path = self._remove_nexus_annotations(nx_path)
+            self._set_hdf5_reference(
+                self.archive,
+                dset.archive_path,
+                f'/uploads/{self.archive.m_context.upload_id}/raw'
+                f'/{self.data_file}#{hdf5_path}',
+            )
+        for nx_path, attr_d in list(self._hdf5_attributes.items()) + list(
+            attr_dict.items()
+        ):
+            for attr_k, attr_v in attr_d.items():
+                if attr_v != 'dimensionless' and attr_v:
+                    try:
+                        template[f'{nx_path}/@{attr_k}'] = attr_v
+                    except KeyError:
+                        template['optional'][f'{nx_path}/@{attr_k}'] = attr_v
+
+        nx_full_file_path = os.path.join(
+            self.archive.m_context.raw_path(), self.data_file
+        )
+
+        if self.archive.m_context.raw_path_exists(self.data_file):
+            os.remove(nx_full_file_path)
+        pynxtools_writer(
+            data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path
+        ).write()
+
+        entry_list = Entry.objects(
+            upload_id=self.archive.m_context.upload_id, mainfile=self.data_file
+        )
+        if not entry_list:
+            self.archive.m_context.process_updated_raw_file(self.data_file)
 
     def _write_hdf5_file(self):  # noqa: PLR0912
         """
@@ -435,6 +500,67 @@ def _write_hdf5_file(self):  # noqa: PLR0912
         self._hdf5_datasets = collections.OrderedDict()
         self._hdf5_attributes = collections.OrderedDict()
 
+    @staticmethod
+    def walk_through_object(parent_obj, attr_chain):
+        """
+        Walk though the object until reach the leaf.
+
+        Args:
+            parent_obj: This is a python obj.
+                e.g.Arvhive
+            attr_chain: Dot separated obj chain.
+                e.g. 'archive.data.xrd_settings.source.xray_tube_material'
+            default: A value to be returned by default, if not data is found.
+        """
+        if parent_obj is None:
+            return parent_obj
+
+        if isinstance(attr_chain, str) and attr_chain.startswith('archive.'):
+            parts = attr_chain.split('.')
+            child_obj = None
+            for part in parts[1:]:
+                child_nm = part
+                if '[' in child_nm:
+                    child_nm, index = child_nm.split('[')
+                    index = int(index[:-1])
+                    # section always exists
+                    child_obj = getattr(parent_obj, child_nm)[index]
+                else:
+                    child_obj = getattr(parent_obj, child_nm, None)
+                if child_obj is None:
+                    return None
+                parent_obj = child_obj
+
+            return child_obj
+
+    def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
+        """Construct datasets and attributes for nexus and populate."""
+
+        for nx_path, arch_path in CONCEPT_MAP.items():
+            if arch_path.startswith('archive.'):
+                data = self.walk_through_object(self.archive, arch_path)
+            else:
+                data = arch_path  # default value
+
+            dataset = DatasetModel(
+                data=data,
+            )
+
+            if (
+                isinstance(data, pint.Quantity)
+                and str(data.units) != 'dimensionless'
+                and str(data.units)
+            ):
+                attr_tmp = {nx_path: dict(units=str(data.units))}
+                attr_dict |= attr_tmp
+                dataset.data = data.magnitude
+
+            l_part, r_part = nx_path.split('/', 1)
+            if r_part.startswith('@'):
+                attr_dict[l_part] = {r_part.replace('@', ''): data}
+            else:
+                dataset_dict[nx_path] = dataset
+
     @staticmethod
     def _remove_nexus_annotations(path: str) -> str:
         """
diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py
index db73af2f..21474aea 100644
--- a/src/nomad_measurements/xrd/nx.py
+++ b/src/nomad_measurements/xrd/nx.py
@@ -37,3 +37,36 @@
     '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone',
     '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
 ]
+
+
+CONCEPT_MAP = {
+    '/ENTRY[entry]/@default': 'experiment_result',
+    '/ENTRY[entry]/definition': 'NXxrd_pan',
+    '/ENTRY[entry]/method': 'archive.data.method',
+    '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
+    '/ENTRY[entry]/experiment_result/@signal': 'intensity',
+    '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': (
+        'archive.data.results[0].scan_axis'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': (
+        'archive.data.xrd_settings.source.xray_tube_material'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': (
+        'archive.data.xrd_settings.source.xray_tube_current'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': (
+        'archive.data.xrd_settings.source.xray_tube_voltage'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': (
+        'archive.data.xrd_settings.source.kalpha_one'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': (
+        'archive.data.xrd_settings.source.kalpha_two'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': (
+        'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone'
+    ),
+    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': (
+        'archive.data.xrd_settings.source.kbeta'
+    ),
+}
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 80ec1dc3..7ba2f994 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -73,6 +74,8 @@
 from nomad_measurements.utils import (
     HDF5Handler,
     get_bounding_range_2d,
+    get_entry_id_from_file_name,
+    get_reference,
     merge_sections,
 )
 from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS
@@ -1137,6 +1140,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
     diffraction_method_name.m_annotations['eln'] = ELNAnnotation(
         component=ELNComponentEnum.EnumEditQuantity,
     )
+    nexus_results = Quantity(
+        type=ArchiveSection,
+        description='Reference to the NeXus entry.',
+        a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity),
+    )
 
     def get_read_write_functions(self) -> tuple[Callable, Callable]:
         """
@@ -1300,6 +1308,18 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                 self.hdf5_handler.write_file()
                 if self.hdf5_handler.data_file != self.auxiliary_file:
                     self.auxiliary_file = self.hdf5_handler.data_file
+
+        if archive.m_context.raw_path_exists(
+            self.auxiliary_file
+        ) and self.auxiliary_file.endswith('.nxs'):
+            nx_entry_id = get_entry_id_from_file_name(
+                archive=archive, file_name=self.auxiliary_file
+            )
+            ref_to_nx_entry_data = get_reference(
+                archive.metadata.upload_id, nx_entry_id
+            )
+            self.nexus_results = f'{ref_to_nx_entry_data}'
+
         super().normalize(archive, logger)
 
 
From 62569dd35ac9dc0ad8395c2eab2ab2afb95ead95 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Fri, 20 Dec 2024 17:29:38 +0100
Subject: [PATCH 18/41] Move common functionality to resolve_path

---
 src/nomad_measurements/utils.py | 101 +++++++++++++++-----------------
 1 file changed, 46 insertions(+), 55 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 56066f63..1e707973 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -500,51 +500,16 @@ def _write_hdf5_file(self):  # noqa: PLR0912
         self._hdf5_datasets = collections.OrderedDict()
         self._hdf5_attributes = collections.OrderedDict()
 
-    @staticmethod
-    def walk_through_object(parent_obj, attr_chain):
-        """
-        Walk though the object until reach the leaf.
-
-        Args:
-            parent_obj: This is a python obj.
-                e.g.Arvhive
-            attr_chain: Dot separated obj chain.
-                e.g. 'archive.data.xrd_settings.source.xray_tube_material'
-            default: A value to be returned by default, if not data is found.
-        """
-        if parent_obj is None:
-            return parent_obj
-
-        if isinstance(attr_chain, str) and attr_chain.startswith('archive.'):
-            parts = attr_chain.split('.')
-            child_obj = None
-            for part in parts[1:]:
-                child_nm = part
-                if '[' in child_nm:
-                    child_nm, index = child_nm.split('[')
-                    index = int(index[:-1])
-                    # section always exists
-                    child_obj = getattr(parent_obj, child_nm)[index]
-                else:
-                    child_obj = getattr(parent_obj, child_nm, None)
-                if child_obj is None:
-                    return None
-                parent_obj = child_obj
-
-            return child_obj
-
     def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
         """Construct datasets and attributes for nexus and populate."""
 
         for nx_path, arch_path in CONCEPT_MAP.items():
             if arch_path.startswith('archive.'):
-                data = self.walk_through_object(self.archive, arch_path)
+                data = resolve_path(self.archive, arch_path.split('archive.', 1)[1])
             else:
                 data = arch_path  # default value
 
-            dataset = DatasetModel(
-                data=data,
-            )
+            dataset = DatasetModel(data=data)
 
             if (
                 isinstance(data, pint.Quantity)
@@ -593,33 +558,59 @@ def _set_hdf5_reference(
         section: 'ArchiveSection' = None, path: str = None, ref: str = None
     ):
         """
-        Method for setting a HDF5Reference quantity in a section. It can handle
-        nested quantities and repeatable sections, provided that the quantity itself
-        is of type `HDF5Reference`.
+        Method for setting a HDF5Reference quantity in a section.
         For example, one can set the reference for a quantity path like
         `data.results[0].intensity`.
+        In case the section is not initialized, the method returns without setting
+        the reference.
 
         Args:
             section (Section): The NOMAD section containing the quantity.
             path (str): The path to the quantity.
             ref (str): The reference to the HDF5 dataset.
         """
-        # TODO handle the case when section in the path is not initialized
-
         if not section or not path or not ref:
             return
-        attr = section
-        path = path.split('.')
-        quantity_name = path.pop()
-
-        for subpath in path:
-            if re.match(r'.*\[.*\]', subpath):
-                index = int(subpath.split('[')[1].split(']')[0])
-                attr = attr.m_get(subpath.split('[')[0], index=index)
-            else:
-                attr = attr.m_get(subpath)
 
-        if isinstance(
-            attr.m_get_quantity_definition(quantity_name).type, HDF5Reference
+        section_path, quantity_name = path.rsplit('.', 1)
+        resolved_section = resolve_path(section, section_path)
+
+        if resolved_section and isinstance(
+            resolved_section.m_get_quantity_definition(quantity_name).type,
+            HDF5Reference,
         ):
-            attr.m_set(quantity_name, ref)
+            resolved_section.m_set(quantity_name, ref)
+
+
+def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None):
+    """
+    Resolves the attribute path within the given NOMAD section.
+
+    Args:
+        section (ArchiveSection): The NOMAD section.
+        path (str): The dot-separated path to the attribute.
+        logger (BoundLogger): A structlog logger.
+
+    Returns:
+        The resolved section or attribute or None if not found.
+    """
+    attr = section
+    parts = path.split('.')
+    try:
+        for part in parts:
+            attr_path = part
+            if re.match(r'.*\[.*\]', attr_path):
+                attr_path, index = part[:-1].split('[')
+                index = int(index)
+            else:
+                index = None
+            attr = attr.m_get(attr_path, index=index)
+    except (KeyError, ValueError, AttributeError) as e:
+        if logger:
+            logger.error(
+                f'Unable to resolve part "{part}" of the given path "{path}". '
+                f'Encountered error "{e}".'
+            )
+        return None
+
+    return attr

From 6e47b5bd118f6dcaf52bda3d6c8b282800af9493 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 7 Jan 2025 16:26:24 +0100
Subject: [PATCH 19/41] Allow reading with dataset_path alone

---
 src/nomad_measurements/utils.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 1e707973..11b0a8ff 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -269,7 +269,7 @@ def add_dataset(
             validate_path (bool): If True, the dataset path is validated.
         """
         if not params:
-            self.logger.warning('Dataset `params` must be provided.')
+            self.logger.warning('Dataset `params` not provided.')
             return
 
         dataset = DatasetModel(
@@ -309,7 +309,7 @@ def add_attribute(
             params (dict): The attributes to be added.
         """
         if not params:
-            self.logger.warning('Attribute `params` must be provided.')
+            self.logger.warning('Attribute `params` not provided.')
             return
         self._hdf5_attributes[path] = params
 
@@ -324,7 +324,10 @@ def read_dataset(self, path: str):
         """
         if path is None:
             return
-        file_path, dataset_path = path.split('#')
+        if '#' not in path:
+            file_path, dataset_path = None, path
+        else:
+            file_path, dataset_path = path.rsplit('#', 1)
 
         # find path in the instance variables
         value = None
@@ -336,6 +339,8 @@ def read_dataset(self, path: str):
                     value *= ureg(units)
             return value
 
+        if not file_path:
+            return
         file_name = file_path.rsplit('/raw/', 1)[1]
         with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5:
             if dataset_path not in h5:

From 56abe1e0ed0529918872a8be64b5bd8cc56db380 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 14 Jan 2025 12:06:46 +0100
Subject: [PATCH 20/41] Allow reading with archive paths

---
 src/nomad_measurements/utils.py | 40 ++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 11b0a8ff..8158e1df 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -243,6 +243,7 @@ def __init__(
 
         self._hdf5_datasets = collections.OrderedDict()
         self._hdf5_attributes = collections.OrderedDict()
+        self._hdf5_path_map = collections.OrderedDict()
 
     def add_dataset(
         self,
@@ -294,6 +295,8 @@ def add_dataset(
             dataset.data = dataset.data.magnitude
 
         self._hdf5_datasets[path] = dataset
+        if dataset.archive_path:
+            self._hdf5_path_map[dataset.archive_path] = path
 
     def add_attribute(
         self,
@@ -313,7 +316,7 @@ def add_attribute(
             return
         self._hdf5_attributes[path] = params
 
-    def read_dataset(self, path: str):
+    def read_dataset(self, path: str, is_archive_path: bool = False):
         """
         Returns the dataset at the given path. If the quantity has `units` as an
         attribute, tries to returns a `pint.Quantity`.
@@ -321,9 +324,14 @@ def read_dataset(self, path: str):
 
         Args:
             path (str): The dataset path in the HDF5 file.
+            is_archive_path (bool): If True, the path is resolved from the archive path.
         """
         if path is None:
             return
+        if is_archive_path and path in self._hdf5_path_map:
+            path = self._hdf5_path_map[path]
+            if path is None:
+                return
         if '#' not in path:
             file_path, dataset_path = None, path
         else:
@@ -339,20 +347,22 @@ def read_dataset(self, path: str):
                     value *= ureg(units)
             return value
 
-        if not file_path:
-            return
-        file_name = file_path.rsplit('/raw/', 1)[1]
-        with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5:
-            if dataset_path not in h5:
-                self.logger.warning(f'Dataset "{dataset_path}" not found.')
-            else:
-                value = h5[dataset_path][...]
-                try:
-                    units = h5[dataset_path].attrs['units']
-                    value *= ureg(units)
-                except KeyError:
-                    pass
-        return value
+        # find path in the HDF5 file
+        if file_path:
+            file_name = file_path.rsplit('/raw/', 1)[1]
+            with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5:
+                if dataset_path not in h5:
+                    self.logger.warning(f'Dataset "{dataset_path}" not found.')
+                else:
+                    value = h5[dataset_path][...]
+                    try:
+                        units = h5[dataset_path].attrs['units']
+                        value *= ureg(units)
+                    except KeyError:
+                        pass
+                return value
+
+        return None
 
     def write_file(self):
         """

From 3b3cde9cf801a93617b991e61a2a9bf484d29336 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 14 Jan 2025 12:07:19 +0100
Subject: [PATCH 21/41] Never reset the instance variables

---
 src/nomad_measurements/utils.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 8158e1df..443a1a39 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -511,10 +511,6 @@ def _write_hdf5_file(self):  # noqa: PLR0912
                 else:
                     self.logger.warning(f'Path "{key}" not found to add attribute.')
 
-        # reset hdf5 datasets and atttributes
-        self._hdf5_datasets = collections.OrderedDict()
-        self._hdf5_attributes = collections.OrderedDict()
-
     def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
         """Construct datasets and attributes for nexus and populate."""
 

From 162f5877e3f9cfc2419bb54fcc9844a9666f05a9 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 14 Jan 2025 12:08:02 +0100
Subject: [PATCH 22/41] Final file writing when the instance is deleted

---
 src/nomad_measurements/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 443a1a39..f8672226 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -592,6 +592,10 @@ def _set_hdf5_reference(
         ):
             resolved_section.m_set(quantity_name, ref)
 
+    def __del__(self):
+        if self._hdf5_datasets or self._hdf5_attributes:
+            self.write_file()
+
 
 def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None):
     """

From 9573636866a5c1dd9c0d8943b8b9de7593c51b70 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 14 Jan 2025 15:48:38 +0100
Subject: [PATCH 23/41] Reduce write_file calls to one

---
 src/nomad_measurements/xrd/schema.py | 168 ++++++++++++++++++---------
 1 file changed, 116 insertions(+), 52 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 7ba2f994..5b1089d8 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -290,6 +290,14 @@ class XRDResultPlotIntensity(ArchiveSection):
         type=HDF5Reference,
         description='The omega range of the diffractogram',
     )
+    phi = Quantity(
+        type=HDF5Reference,
+        description='The phi range of the diffractogram',
+    )
+    chi = Quantity(
+        type=HDF5Reference,
+        description='The chi range of the diffractogram',
+    )
 
     def normalize(self, archive, logger):
         super().normalize(archive, logger)
@@ -300,9 +308,6 @@ def normalize(self, archive, logger):
         except (AttributeError, AssertionError):
             return
 
-        if self.intensity is None or self.two_theta is None:
-            return
-
         hdf5_handler.add_dataset(
             path=f'{prefix}/plot_intensity/two_theta',
             params=dict(
@@ -329,8 +334,15 @@ def normalize(self, archive, logger):
                 NX_class='NXdata',
             ),
         )
+        if isinstance(self.m_parent, XRDResult1D):
+            return
+
         for var_axis in ['omega', 'phi', 'chi']:
-            if self.get(var_axis) is not None:
+            var_axis_data = hdf5_handler.read_dataset(
+                path=f'data.results[0].{var_axis}',
+                is_archive_path=True,
+            )
+            if var_axis_data is not None:
                 hdf5_handler.add_dataset(
                     path=f'{prefix}/plot_intensity/{var_axis}',
                     params=dict(
@@ -350,8 +362,6 @@ def normalize(self, archive, logger):
                 )
                 break
 
-        hdf5_handler.write_file()
-
 
 class XRDResultPlotIntensityScatteringVector(ArchiveSection):
     m_def = Section(
@@ -388,10 +398,24 @@ def normalize(self, archive, logger):
         except (AttributeError, AssertionError):
             return
 
-        if self.intensity is None:
-            return
+        intensity = hdf5_handler.read_dataset(
+            path='data.results[0].intensity',
+            is_archive_path=True,
+        )
+        q_norm = hdf5_handler.read_dataset(
+            path='data.results[0].q_norm',
+            is_archive_path=True,
+        )
+        q_parallel = hdf5_handler.read_dataset(
+            path='data.results[0].q_parallel',
+            is_archive_path=True,
+        )
+        q_perpendicular = hdf5_handler.read_dataset(
+            path='data.results[0].q_perpendicular',
+            is_archive_path=True,
+        )
 
-        if self.q_norm is not None:
+        if q_norm is not None:
             hdf5_handler.add_dataset(
                 path=f'{prefix}/plot_intensity_scattering_vector/intensity',
                 params=dict(
@@ -418,10 +442,7 @@ def normalize(self, archive, logger):
                     NX_class='NXdata',
                 ),
             )
-        elif self.q_parallel is not None and self.q_perpendicular is not None:
-            intensity = hdf5_handler.read_dataset(self.intensity)
-            q_parallel = hdf5_handler.read_dataset(self.q_parallel)
-            q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular)
+        elif q_parallel is not None and q_perpendicular is not None:
             # q_vectors lead to irregular grid
             # generate a regular grid using interpolation
             x = q_parallel.to('1/angstrom').magnitude.flatten()
@@ -468,7 +489,6 @@ def normalize(self, archive, logger):
                     NX_class='NXdata',
                 ),
             )
-        hdf5_handler.write_file()
 
 
 class XRDResult(MeasurementResult):
@@ -547,8 +567,14 @@ def generate_plots(self):
         except (AttributeError, AssertionError):
             return plots
 
-        two_theta = hdf5_handler.read_dataset(self.two_theta)
-        intensity = hdf5_handler.read_dataset(self.intensity)
+        two_theta = hdf5_handler.read_dataset(
+            path='data.results[0].two_theta',
+            is_archive_path=True,
+        )
+        intensity = hdf5_handler.read_dataset(
+            path='data.results[0].intensity',
+            is_archive_path=True,
+        )
         if two_theta is None or intensity is None:
             return plots
 
@@ -639,7 +665,10 @@ def generate_plots(self):
             )
         )
 
-        q_norm = hdf5_handler.read_dataset(self.q_norm)
+        q_norm = hdf5_handler.read_dataset(
+            path='data.results[0].q_norm',
+            is_archive_path=True,
+        )
         if q_norm is None:
             return plots
 
@@ -713,9 +742,22 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         except (AttributeError, AssertionError):
             return
 
+        intensity = hdf5_handler.read_dataset(
+            path='data.results[0].intensity',
+            is_archive_path=True,
+        )
+        two_theta = hdf5_handler.read_dataset(
+            path='data.results[0].two_theta',
+            is_archive_path=True,
+        )
+        if intensity is None or two_theta is None:
+            return
+
         if self.source_peak_wavelength is not None:
-            q_norm = hdf5_handler.read_dataset(self.q_norm)
-            two_theta = hdf5_handler.read_dataset(self.two_theta)
+            q_norm = hdf5_handler.read_dataset(
+                path='data.results[0].q_norm',
+                is_archive_path=True,
+            )
             q_norm, two_theta = calculate_two_theta_or_q(
                 wavelength=self.source_peak_wavelength,
                 two_theta=two_theta,
@@ -735,15 +777,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                     archive_path='data.results[0].two_theta',
                 ),
             )
-            hdf5_handler.write_file()
             self.m_setdefault('plot_intensity_scattering_vector')
-            self.plot_intensity_scattering_vector.intensity = self.intensity
-            self.plot_intensity_scattering_vector.q_norm = self.q_norm
             self.plot_intensity_scattering_vector.normalize(archive, logger)
 
         self.m_setdefault('plot_intensity')
-        self.plot_intensity.intensity = self.intensity
-        self.plot_intensity.two_theta = self.two_theta
         self.plot_intensity.normalize(archive, logger)
 
 
@@ -781,9 +818,18 @@ def generate_plots(self):
         except (AttributeError, AssertionError):
             return plots
 
-        two_theta = hdf5_handler.read_dataset(self.two_theta)
-        intensity = hdf5_handler.read_dataset(self.intensity)
-        omega = hdf5_handler.read_dataset(self.omega)
+        two_theta = hdf5_handler.read_dataset(
+            path='data.results[0].two_theta',
+            is_archive_path=True,
+        )
+        intensity = hdf5_handler.read_dataset(
+            path='data.results[0].intensity',
+            is_archive_path=True,
+        )
+        omega = hdf5_handler.read_dataset(
+            path='data.results[0].omega',
+            is_archive_path=True,
+        )
         if two_theta is None or intensity is None or omega is None:
             return plots
 
@@ -859,8 +905,14 @@ def generate_plots(self):
         )
 
         # Plot for RSM in Q-vectors
-        q_parallel = hdf5_handler.read_dataset(self.q_parallel)
-        q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular)
+        q_parallel = hdf5_handler.read_dataset(
+            path='data.results[0].q_parallel',
+            is_archive_path=True,
+        )
+        q_perpendicular = hdf5_handler.read_dataset(
+            path='data.results[0].q_perpendicular',
+            is_archive_path=True,
+        )
         if q_parallel is not None and q_perpendicular is not None:
             x = q_parallel.to('1/angstrom').magnitude.flatten()
             y = q_perpendicular.to('1/angstrom').magnitude.flatten()
@@ -956,20 +1008,34 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         except (AttributeError, AssertionError):
             return
 
+        intensity = hdf5_handler.read_dataset(
+            path='data.results[0].intensity',
+            is_archive_path=True,
+        )
+        two_theta = hdf5_handler.read_dataset(
+            path='data.results[0].two_theta',
+            is_archive_path=True,
+        )
         var_axis = None
         for axis in ['omega', 'chi', 'phi']:
-            axis_value = hdf5_handler.read_dataset(getattr(self, axis))
+            axis_value = hdf5_handler.read_dataset(
+                path=f'data.results[0].{axis}',
+                is_archive_path=True,
+            )
             if axis_value is not None and len(np.unique(axis_value.magnitude)) > 1:
                 var_axis = axis
                 break
+        if intensity is None or two_theta is None or var_axis is None:
+            return
 
-        if self.source_peak_wavelength is not None and var_axis is not None:
-            two_theta = hdf5_handler.read_dataset(self.two_theta)
-            intensity = hdf5_handler.read_dataset(self.intensity)
+        if self.source_peak_wavelength is not None:
             q_parallel, q_perpendicular = calculate_q_vectors_rsm(
                 wavelength=self.source_peak_wavelength,
                 two_theta=two_theta * np.ones_like(intensity),
-                omega=hdf5_handler.read_dataset(getattr(self, var_axis)),
+                omega=hdf5_handler.read_dataset(
+                    path=f'data.results[0].{var_axis}',
+                    is_archive_path=True,
+                ),
             )
             hdf5_handler.add_dataset(
                 path='/ENTRY[entry]/experiment_result/q_parallel',
@@ -985,19 +1051,11 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                     archive_path='data.results[0].q_perpendicular',
                 ),
             )
-            hdf5_handler.write_file()
             self.m_setdefault('plot_intensity_scattering_vector')
-            self.plot_intensity_scattering_vector.intensity = self.intensity
-            self.plot_intensity_scattering_vector.q_parallel = self.q_parallel
-            self.plot_intensity_scattering_vector.q_perpendicular = self.q_perpendicular
             self.plot_intensity_scattering_vector.normalize(archive, logger)
 
-        if var_axis is not None:
-            self.m_setdefault('plot_intensity')
-            self.plot_intensity.intensity = self.intensity
-            self.plot_intensity.two_theta = self.two_theta
-            self.plot_intensity.m_set(var_axis, getattr(self, var_axis))
-            self.plot_intensity.normalize(archive, logger)
+        self.m_setdefault('plot_intensity')
+        self.plot_intensity.normalize(archive, logger)
 
 
 class XRayDiffraction(Measurement):
@@ -1079,10 +1137,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         if not archive.results.properties.structural:
             diffraction_patterns = []
             for result in self.results:
-                intensity = hdf5_handler.read_dataset(result.intensity)
+                intensity = hdf5_handler.read_dataset(
+                    'data.results[0].intensity', is_archive_path=True
+                )
                 if len(intensity.shape) == 1:
-                    two_theta = hdf5_handler.read_dataset(result.two_theta)
-                    q_norm = hdf5_handler.read_dataset(result.q_norm)
+                    two_theta = hdf5_handler.read_dataset(
+                        'data.results[0].two_theta', is_archive_path=True
+                    )
+                    q_norm = hdf5_handler.read_dataset(
+                        'data.results[0].q_norm', is_archive_path=True
+                    )
                     diffraction_patterns.append(
                         DiffractionPattern(
                             incident_beam_wavelength=result.source_peak_wavelength,
@@ -1305,10 +1369,12 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                 with archive.m_context.raw_file(self.data_file) as file:
                     xrd_dict = read_function(file.name, logger)
                 write_function(xrd_dict, archive, logger)
-                self.hdf5_handler.write_file()
-                if self.hdf5_handler.data_file != self.auxiliary_file:
-                    self.auxiliary_file = self.hdf5_handler.data_file
 
+        super().normalize(archive, logger)
+
+        self.hdf5_handler.write_file()
+        if self.hdf5_handler.data_file != self.auxiliary_file:
+            self.auxiliary_file = self.hdf5_handler.data_file
         if archive.m_context.raw_path_exists(
             self.auxiliary_file
         ) and self.auxiliary_file.endswith('.nxs'):
@@ -1320,8 +1386,6 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             )
             self.nexus_results = f'{ref_to_nx_entry_data}'
 
-        super().normalize(archive, logger)
-
 
 class RawFileXRDData(EntryData):
     """

From 369dc72bee09dd23a4a35e0a04bedad78647463e Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Wed, 15 Jan 2025 11:33:03 +0100
Subject: [PATCH 24/41] Fix: set hard links for internal ref

---
 src/nomad_measurements/utils.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index f8672226..44fb6859 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -474,10 +474,11 @@ def _write_hdf5_file(self):  # noqa: PLR0912
             self.archive.m_context.raw_file(self.data_file, mode), 'a'
         ) as h5:
             for key, value in self._hdf5_datasets.items():
-                if value.data is None:
+                data = value.data
+                if data is None:
                     self.logger.warning(f'No data found for "{key}". Skipping.')
                     continue
-                elif value.internal_reference:
+                if value.internal_reference:
                     # resolve the internal reference
                     try:
                         data = h5[self._remove_nexus_annotations(value.data)]
@@ -486,15 +487,19 @@ def _write_hdf5_file(self):  # noqa: PLR0912
                             f'Internal reference "{value.data}" not found. Skipping.'
                         )
                         continue
-                else:
-                    data = value.data
 
                 group_name, dataset_name = key.rsplit('/', 1)
                 group = h5.require_group(group_name)
 
                 if key in h5:
-                    group[dataset_name][...] = data
+                    # remove the existing dataset if any
+                    del h5[key]
+
+                if value.internal_reference:
+                    # create a hard link to the existing dataset
+                    group[dataset_name] = data
                 else:
+                    # create the dataset
                     group.create_dataset(
                         name=dataset_name,
                         data=data,

From aa0277c384810d49fdb46c511d269a4782e46201 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Wed, 15 Jan 2025 11:56:20 +0100
Subject: [PATCH 25/41] remove del method; fix test

---
 src/nomad_measurements/utils.py | 4 ----
 tests/test_xrd.py               | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 44fb6859..6722e186 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -597,10 +597,6 @@ def _set_hdf5_reference(
         ):
             resolved_section.m_set(quantity_name, ref)
 
-    def __del__(self):
-        if self._hdf5_datasets or self._hdf5_attributes:
-            self.write_file()
-
 
 def resolve_path(section: 'ArchiveSection', path: str, logger: 'BoundLogger' = None):
     """
diff --git a/tests/test_xrd.py b/tests/test_xrd.py
index 124be398..4aa061d6 100644
--- a/tests/test_xrd.py
+++ b/tests/test_xrd.py
@@ -17,6 +17,7 @@
 #
 import pytest
 from nomad.client import normalize_all
+from nomad_measurements.xrd.schema import XRDResult1D
 
 test_files = [
     'tests/data/xrd/XRD-918-16_10.xrdml',
@@ -52,7 +53,7 @@ def test_normalize_all(parsed_measurement_archive, caplog):
     assert parsed_measurement_archive.data.results[
         0
     ].source_peak_wavelength.magnitude == pytest.approx(1.540598, 1e-2)
-    if len(parsed_measurement_archive.data.results[0].intensity.shape) == 1:
+    if isinstance(parsed_measurement_archive.data.results[0], XRDResult1D):
         assert (
             parsed_measurement_archive.results.properties.structural.diffraction_pattern[
                 0

From a71db65b9e70292858798d39046f819dc42f29c9 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 16 Jan 2025 11:35:30 +0100
Subject: [PATCH 26/41] Combine nexus dataset map

---
 src/nomad_measurements/utils.py      | 23 ++++++++--------
 src/nomad_measurements/xrd/nx.py     | 41 ++++++++++++----------------
 src/nomad_measurements/xrd/schema.py |  5 ++--
 3 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 6722e186..06ea941b 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -38,8 +38,6 @@
 from pynxtools.dataconverter.template import Template
 from pynxtools.dataconverter.writer import Writer as pynxtools_writer
 
-from nomad_measurements.xrd.nx import CONCEPT_MAP
-
 if TYPE_CHECKING:
     from nomad.datamodel.data import (
         ArchiveSection,
@@ -217,8 +215,7 @@ def __init__(
         filename: str,
         archive: 'EntryArchive',
         logger: 'BoundLogger',
-        valid_dataset_paths: list = None,
-        nexus: bool = False,
+        nexus_dataset_map: dict = None,
     ):
         """
         Initialize the handler.
@@ -227,8 +224,8 @@ def __init__(
             filename (str): The name of the auxiliary file.
             archive (EntryArchive): The NOMAD archive.
             logger (BoundLogger): A structlog logger.
-            valid_dataset_paths (list): The list of valid dataset paths.
-            nexus (bool): If True, the file is created as a NeXus file.
+            nexus_dataset_map (dict): The NeXus dataset map containing the nexus file
+                dataset paths and the corresponding archive paths.
         """
         if not filename.endswith(('.nxs', '.h5')):
             raise ValueError('Only .h5 or .nxs files are supported.')
@@ -236,10 +233,12 @@ def __init__(
         self.data_file = filename
         self.archive = archive
         self.logger = logger
-        self.valid_dataset_paths = []
-        if valid_dataset_paths:
-            self.valid_dataset_paths = valid_dataset_paths
-        self.nexus = nexus
+
+        self.nexus = True if nexus_dataset_map else False
+        self.nexus_dataset_map = nexus_dataset_map
+        self.valid_dataset_paths = (
+            list(nexus_dataset_map.keys()) if nexus_dataset_map else []
+        )
 
         self._hdf5_datasets = collections.OrderedDict()
         self._hdf5_attributes = collections.OrderedDict()
@@ -519,7 +518,9 @@ def _write_hdf5_file(self):  # noqa: PLR0912
     def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
         """Construct datasets and attributes for nexus and populate."""
 
-        for nx_path, arch_path in CONCEPT_MAP.items():
+        for nx_path, arch_path in self.nexus_dataset_map.items():
+            if nx_path in self._hdf5_datasets or nx_path in self._hdf5_attributes:
+                continue
             if arch_path.startswith('archive.'):
                 data = resolve_path(self.archive, arch_path.split('archive.', 1)[1])
             else:
diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py
index 21474aea..568b60dd 100644
--- a/src/nomad_measurements/xrd/nx.py
+++ b/src/nomad_measurements/xrd/nx.py
@@ -16,35 +16,30 @@
 # limitations under the License.
 #
 
-NEXUS_DATASET_PATHS = [
-    '/ENTRY[entry]/experiment_result/intensity',
-    '/ENTRY[entry]/experiment_result/two_theta',
-    '/ENTRY[entry]/experiment_result/omega',
-    '/ENTRY[entry]/experiment_result/chi',
-    '/ENTRY[entry]/experiment_result/phi',
-    '/ENTRY[entry]/experiment_config/count_time',
-    '/ENTRY[entry]/experiment_result/q_norm',
-    '/ENTRY[entry]/experiment_result/q_parallel',
-    '/ENTRY[entry]/experiment_result/q_perpendicular',
-    '/ENTRY[entry]/method',
-    '/ENTRY[entry]/measurement_type',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone',
-    '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
-]
+"""
+The following connects the nexus file paths to the archive paths.
+The nexus file paths come from the nexus_definitions available at:
+https://github.com/FAIRmat-NFDI/nexus_definitions/ in the following file:
+`contributed_definitions/NXxrd_pan.nxdl.xml`.
+The archive paths are the paths in the NOMAD archive defined in the class:
+`nomad_measurement.xrd.schema.ELNXRayDiffraction`.
+"""
 
-
-CONCEPT_MAP = {
+NEXUS_DATASET_MAP = {
     '/ENTRY[entry]/@default': 'experiment_result',
     '/ENTRY[entry]/definition': 'NXxrd_pan',
+    '/ENTRY[entry]/experiment_result/intensity': 'archive.data.results[0].intensity',
+    '/ENTRY[entry]/experiment_result/two_theta': 'archive.data.results[0].two_theta',
+    '/ENTRY[entry]/experiment_result/omega': 'archive.data.results[0].omega',
+    '/ENTRY[entry]/experiment_result/chi': 'archive.data.results[0].chi',
+    '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi',
+    '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm',
+    '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel',
+    '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular',
     '/ENTRY[entry]/method': 'archive.data.method',
     '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
     '/ENTRY[entry]/experiment_result/@signal': 'intensity',
+    '/ENTRY[entry]/experiment_config/count_time': 'archive.data.results[0].count_time',
     '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': (
         'archive.data.results[0].scan_axis'
     ),
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 5b1089d8..40e106bf 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -78,7 +78,7 @@
     get_reference,
     merge_sections,
 )
-from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS
+from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP
 
 if TYPE_CHECKING:
     from nomad.datamodel.datamodel import (
@@ -1357,8 +1357,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
                 filename=self.auxiliary_file,
                 archive=archive,
                 logger=logger,
-                valid_dataset_paths=NEXUS_DATASET_PATHS,
-                nexus=True,
+                nexus_dataset_map=NEXUS_DATASET_MAP,
             )
             read_function, write_function = self.get_read_write_functions()
             if read_function is None or write_function is None:

From 0fed11ac79626cbec92e9c37c2b6a61404800c4b Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 16 Jan 2025 11:43:42 +0100
Subject: [PATCH 27/41] Ruff

---
 src/nomad_measurements/xrd/nx.py | 4 +++-
 tests/test_xrd.py                | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py
index 568b60dd..9b0d6944 100644
--- a/src/nomad_measurements/xrd/nx.py
+++ b/src/nomad_measurements/xrd/nx.py
@@ -35,7 +35,9 @@
     '/ENTRY[entry]/experiment_result/phi': 'archive.data.results[0].phi',
     '/ENTRY[entry]/experiment_result/q_norm': 'archive.data.results[0].q_norm',
     '/ENTRY[entry]/experiment_result/q_parallel': 'archive.data.results[0].q_parallel',
-    '/ENTRY[entry]/experiment_result/q_perpendicular': 'archive.data.results[0].q_perpendicular',
+    '/ENTRY[entry]/experiment_result/q_perpendicular': (
+        'archive.data.results[0].q_perpendicular'
+    ),
     '/ENTRY[entry]/method': 'archive.data.method',
     '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name',
     '/ENTRY[entry]/experiment_result/@signal': 'intensity',
diff --git a/tests/test_xrd.py b/tests/test_xrd.py
index 4aa061d6..5ebede27 100644
--- a/tests/test_xrd.py
+++ b/tests/test_xrd.py
@@ -17,6 +17,7 @@
 #
 import pytest
 from nomad.client import normalize_all
+
 from nomad_measurements.xrd.schema import XRDResult1D
 
 test_files = [

From 0a055cc3c35300d1deb3af0ebee9ab32f0bbdb0b Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 16 Jan 2025 17:21:44 +0100
Subject: [PATCH 28/41] Make Auxiliary file name without raw file ext

---
 src/nomad_measurements/xrd/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 40e106bf..5501113a 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -1352,7 +1352,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         """
         self.backward_compatibility()
         if self.data_file is not None:
-            self.auxiliary_file = f'{self.data_file}.nxs'
+            self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs'
             self.hdf5_handler = HDF5Handler(
                 filename=self.auxiliary_file,
                 archive=archive,

From 22662372b2b19e8e8593a7d1b1a907326c269c46 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 16 Jan 2025 17:22:35 +0100
Subject: [PATCH 29/41] Add cleanup extensions for fixture

---
 tests/conftest.py          | 13 ++++++++++---
 tests/test_transmission.py |  9 +++++++--
 tests/test_xrd.py          | 10 +++++++++-
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 975b2f47..e6b40201 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -66,8 +66,11 @@ def fixture_parsed_measurement_archive(request):
     file created by plugin parsers for the measurement data. Parsing this
     `.archive.json` file returns the `EntryArchive` object for the measurement data,
     which is finally yeilded to the test function.
+    request.param[0] is the relative path to the data file.
+    request.param[1] is a list of file extensions that need to be cleaned up after
+    the test.
     """
-    rel_file_path = request.param
+    rel_file_path = request.param[0]
     file_archive = parse(rel_file_path)[0]
 
     rel_measurement_archive_path = os.path.join(
@@ -79,5 +82,9 @@ def fixture_parsed_measurement_archive(request):
 
     yield parse(rel_measurement_archive_path)[0]
 
-    if os.path.exists(rel_measurement_archive_path):
-        os.remove(rel_measurement_archive_path)
+    # clean up
+    clean_up_extensions = request.param[1]
+    for ext in clean_up_extensions:
+        path = os.path.join(rel_file_path.rsplit('.', 1)[0] + ext)
+        if os.path.exists(path):
+            os.remove(path)
diff --git a/tests/test_transmission.py b/tests/test_transmission.py
index a22e65d6..f9e86330 100644
--- a/tests/test_transmission.py
+++ b/tests/test_transmission.py
@@ -26,11 +26,16 @@
     'tests/data/transmission/sphere_test01.Probe.Raw.asc',
 ]
 log_levels = ['error', 'critical']
+clean_up_extensions = ['.archive.json']
 
 
 @pytest.mark.parametrize(
     'parsed_measurement_archive, caplog',
-    [(file, log_level) for file in test_files for log_level in log_levels],
+    [
+        ((file, clean_up_extensions), log_level)
+        for file in test_files
+        for log_level in log_levels
+    ],
     indirect=True,
 )
 def test_normalize_all(parsed_measurement_archive, caplog):
@@ -46,7 +51,7 @@ def test_normalize_all(parsed_measurement_archive, caplog):
 
 @pytest.mark.parametrize(
     'parsed_measurement_archive, caplog',
-    [(test_files[0], log_level) for log_level in log_levels],
+    [((test_files[0], clean_up_extensions), log_level) for log_level in log_levels],
     indirect=True,
 )
 def test_normalized_data(parsed_measurement_archive, caplog):
diff --git a/tests/test_xrd.py b/tests/test_xrd.py
index 5ebede27..6b65d69e 100644
--- a/tests/test_xrd.py
+++ b/tests/test_xrd.py
@@ -31,11 +31,19 @@
     'tests/data/xrd/TwoTheta_scan_powder.rasx',
 ]
 log_levels = ['error', 'critical']
+clean_up_extensions = ['.archive.json', '.nxs', '.h5']
 
 
 @pytest.mark.parametrize(
     'parsed_measurement_archive, caplog',
-    [(file, log_level) for file in test_files for log_level in log_levels],
+    [
+        (
+            (file, clean_up_extensions),
+            log_level,
+        )
+        for file in test_files
+        for log_level in log_levels
+    ],
     indirect=True,
 )
 def test_normalize_all(parsed_measurement_archive, caplog):

From f484fd8fb898f2254c548cb3fd83448738e4598c Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 16 Jan 2025 18:13:04 +0100
Subject: [PATCH 30/41] Use bool in ELN to control raw file updation

---
 src/nomad_measurements/xrd/schema.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 5501113a..e54c5679 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -1196,6 +1196,14 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
             component=ELNComponentEnum.FileEditQuantity,
         ),
     )
+    overwrite_auxiliary_file = Quantity(
+        type=bool,
+        default=True,
+        description='Overwrite the auxiliary file with the current data.',
+        a_eln=ELNAnnotation(
+            component=ELNComponentEnum.BoolEditQuantity,
+        ),
+    )
     hdf5_handler = None
     measurement_identifiers = SubSection(
         section_def=ReadableIdentifiers,
@@ -1371,12 +1379,14 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
 
         super().normalize(archive, logger)
 
-        self.hdf5_handler.write_file()
+        if self.overwrite_auxiliary_file:
+            self.hdf5_handler.write_file()
+            self.overwrite_auxiliary_file = False
         if self.hdf5_handler.data_file != self.auxiliary_file:
             self.auxiliary_file = self.hdf5_handler.data_file
-        if archive.m_context.raw_path_exists(
-            self.auxiliary_file
-        ) and self.auxiliary_file.endswith('.nxs'):
+
+        self.nexus_results = None
+        if self.auxiliary_file.endswith('.nxs'):
             nx_entry_id = get_entry_id_from_file_name(
                 archive=archive, file_name=self.auxiliary_file
             )

From 05df712901de395267f2c4fbf8980709eac6f0af Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Mon, 20 Jan 2025 14:36:05 +0100
Subject: [PATCH 31/41] Remove 'file and entry deletion'

---
 src/nomad_measurements/utils.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 06ea941b..ba578bd5 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -377,10 +377,6 @@ def write_file(self):
                     f"""NeXusFileGenerationError: Encountered '{e}' error while creating
                     nexus file. Creating h5 file instead."""
                 )
-                if self.archive.m_context.raw_path_exists(self.data_file):
-                    os.remove(
-                        os.path.join(self.archive.m_context.raw_path(), self.data_file)
-                    )
                 self._write_hdf5_file()
         else:
             self._write_hdf5_file()
@@ -436,8 +432,6 @@ def _write_nx_file(self):
             self.archive.m_context.raw_path(), self.data_file
         )
 
-        if self.archive.m_context.raw_path_exists(self.data_file):
-            os.remove(nx_full_file_path)
         pynxtools_writer(
             data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path
         ).write()

From c6c891d225e05fe89e97d5a0efbf979721961217 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Mon, 20 Jan 2025 14:43:37 +0100
Subject: [PATCH 32/41] remove defaults: trigger write if file is missing

---
 src/nomad_measurements/xrd/schema.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index e54c5679..e1cc8afe 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -1198,7 +1198,6 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
     )
     overwrite_auxiliary_file = Quantity(
         type=bool,
-        default=True,
         description='Overwrite the auxiliary file with the current data.',
         a_eln=ELNAnnotation(
             component=ELNComponentEnum.BoolEditQuantity,
@@ -1379,11 +1378,13 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
 
         super().normalize(archive, logger)
 
-        if self.overwrite_auxiliary_file:
+        if self.overwrite_auxiliary_file or not archive.m_context.raw_path_exists(
+            self.auxiliary_file
+        ):
             self.hdf5_handler.write_file()
             self.overwrite_auxiliary_file = False
-        if self.hdf5_handler.data_file != self.auxiliary_file:
-            self.auxiliary_file = self.hdf5_handler.data_file
+            if self.hdf5_handler.data_file != self.auxiliary_file:
+                self.auxiliary_file = self.hdf5_handler.data_file
 
         self.nexus_results = None
         if self.auxiliary_file.endswith('.nxs'):

From 75374660f58e8c4dad76caf403b4e5bb250217a6 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Mon, 20 Jan 2025 14:44:23 +0100
Subject: [PATCH 33/41] Minor

---
 src/nomad_measurements/xrd/schema.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index e1cc8afe..f9f63be3 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -1132,7 +1132,8 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
 
         try:
             hdf5_handler = self.hdf5_handler
-        except AttributeError:
+            assert isinstance(hdf5_handler, HDF5Handler)
+        except (AttributeError, AssertionError):
             return
         if not archive.results.properties.structural:
             diffraction_patterns = []
@@ -1203,7 +1204,11 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
             component=ELNComponentEnum.BoolEditQuantity,
         ),
     )
-    hdf5_handler = None
+    nexus_results = Quantity(
+        type=ArchiveSection,
+        description='Reference to the NeXus entry.',
+        a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity),
+    )
     measurement_identifiers = SubSection(
         section_def=ReadableIdentifiers,
     )
@@ -1211,11 +1216,7 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData):
     diffraction_method_name.m_annotations['eln'] = ELNAnnotation(
         component=ELNComponentEnum.EnumEditQuantity,
     )
-    nexus_results = Quantity(
-        type=ArchiveSection,
-        description='Reference to the NeXus entry.',
-        a_eln=ELNAnnotation(component=ELNComponentEnum.ReferenceEditQuantity),
-    )
+    hdf5_handler = None
 
     def get_read_write_functions(self) -> tuple[Callable, Callable]:
         """

From 5fc0a8d886d5f1a1b860d473915a07a705cde6f0 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 21 Jan 2025 10:40:01 +0100
Subject: [PATCH 34/41] Set hdf5 references at add_dataset stage

---
 src/nomad_measurements/utils.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index ba578bd5..c64b237c 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -296,6 +296,12 @@ def add_dataset(
         self._hdf5_datasets[path] = dataset
         if dataset.archive_path:
             self._hdf5_path_map[dataset.archive_path] = path
+            self._set_hdf5_reference(
+                self.archive,
+                dataset.archive_path,
+                f'/uploads/{self.archive.m_context.upload_id}/raw'
+                f'/{self.data_file}#{self._remove_nexus_annotations(path)}',
+            )
 
     def add_attribute(
         self,
@@ -411,13 +417,6 @@ def _write_nx_file(self):
             except KeyError:
                 template['optional'][nx_path] = dset.data
 
-            hdf5_path = self._remove_nexus_annotations(nx_path)
-            self._set_hdf5_reference(
-                self.archive,
-                dset.archive_path,
-                f'/uploads/{self.archive.m_context.upload_id}/raw'
-                f'/{self.data_file}#{hdf5_path}',
-            )
         for nx_path, attr_d in list(self._hdf5_attributes.items()) + list(
             attr_dict.items()
         ):
@@ -497,12 +496,6 @@ def _write_hdf5_file(self):  # noqa: PLR0912
                         name=dataset_name,
                         data=data,
                     )
-                self._set_hdf5_reference(
-                    self.archive,
-                    value.archive_path,
-                    f'/uploads/{self.archive.m_context.upload_id}/raw'
-                    f'/{self.data_file}#{key}',
-                )
             for key, value in self._hdf5_attributes.items():
                 if key in h5:
                     h5[key].attrs.update(value)

From 49c6f41f13947a54bb35f141815e07f35bc9de03 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Tue, 21 Jan 2025 10:53:35 +0100
Subject: [PATCH 35/41] Reset on trigger for main branch PR only

---
 .github/workflows/python-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
index 49d80603..ece0b411 100644
--- a/.github/workflows/python-test.yml
+++ b/.github/workflows/python-test.yml
@@ -7,7 +7,7 @@ on:
   push:
     branches: [ "main" ]
   pull_request:
-    branches: [ "*" ]
+    branches: [ "main" ]
 
 permissions:
   contents: read

From af89f3b7fa7fd7fa53a8942b4d02c4e0b683c73e Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 23 Jan 2025 16:05:56 +0100
Subject: [PATCH 36/41] abstract out set hdf5 ref

---
 src/nomad_measurements/utils.py      | 23 ++++++++++++++++-------
 src/nomad_measurements/xrd/schema.py |  2 ++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index c64b237c..3ca87437 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -296,12 +296,6 @@ def add_dataset(
         self._hdf5_datasets[path] = dataset
         if dataset.archive_path:
             self._hdf5_path_map[dataset.archive_path] = path
-            self._set_hdf5_reference(
-                self.archive,
-                dataset.archive_path,
-                f'/uploads/{self.archive.m_context.upload_id}/raw'
-                f'/{self.data_file}#{self._remove_nexus_annotations(path)}',
-            )
 
     def add_attribute(
         self,
@@ -387,6 +381,8 @@ def write_file(self):
         else:
             self._write_hdf5_file()
 
+        self.set_hdf5_references()
+
     def _write_nx_file(self):
         """
         Method for creating a NeXus file. Additional data from the archive is added
@@ -502,6 +498,20 @@ def _write_hdf5_file(self):  # noqa: PLR0912
                 else:
                     self.logger.warning(f'Path "{key}" not found to add attribute.')
 
+    def set_hdf5_references(self):
+        """
+        Method for adding the HDF5 references to the archive quantities.
+        """
+        for key, value in self._hdf5_datasets.items():
+            if value.archive_path:
+                reference = self._remove_nexus_annotations(key)
+                self._set_hdf5_reference(
+                    self.archive,
+                    value.archive_path,
+                    f'/uploads/{self.archive.m_context.upload_id}/raw'
+                    f'/{self.data_file}#{reference}',
+                )
+
     def populate_nx_dataset_and_attribute(self, attr_dict: dict, dataset_dict: dict):
         """Construct datasets and attributes for nexus and populate."""
 
@@ -554,7 +564,6 @@ def _remove_nexus_annotations(path: str) -> str:
                 new_path += '/' + part.split('[')[0].strip().lower()
             else:
                 new_path += '/' + part
-        new_path = new_path.replace('.nxs', '.h5')
         return new_path
 
     @staticmethod
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index f9f63be3..5c2dd353 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -1386,6 +1386,8 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             self.overwrite_auxiliary_file = False
             if self.hdf5_handler.data_file != self.auxiliary_file:
                 self.auxiliary_file = self.hdf5_handler.data_file
+        else:
+            self.hdf5_handler.set_hdf5_references()
 
         self.nexus_results = None
         if self.auxiliary_file.endswith('.nxs'):

From 965a91aa8ede5df1bc49b042d260efb4680a81a0 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 23 Jan 2025 16:09:26 +0100
Subject: [PATCH 37/41] Comment out nexus, TODOs, docstrings

---
 src/nomad_measurements/xrd/schema.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index 5c2dd353..f2ece071 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -273,6 +273,12 @@ class XRDSettings(ArchiveSection):
 
 
 class XRDResultPlotIntensity(ArchiveSection):
+    """
+    Section for plotting the intensity over 2-theta. A separate sub-section allows to
+    create a separate group in `.h5` file. Attributes are added to the group to generate
+    the plot.
+    """
+
     m_def = Section(
         a_h5web=H5WebAnnotation(
             axes=['two_theta', 'omega', 'phi', 'chi'], signal='intensity'
@@ -364,6 +370,12 @@ def normalize(self, archive, logger):
 
 
 class XRDResultPlotIntensityScatteringVector(ArchiveSection):
+    """
+    Section for plotting the intensity over scattering vector. A separate sub-section
+    allows to create a separate group in `.h5` file. Attributes are added to the group
+    to generate the plot.
+    """
+
     m_def = Section(
         a_h5web=H5WebAnnotation(
             axes=['q_parallel', 'q_perpendicular', 'q_norm'], signal='intensity'
@@ -1360,12 +1372,17 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         """
         self.backward_compatibility()
         if self.data_file is not None:
-            self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs'
+            # TODO (ka-sarthak): use .nxs file once updating the flag through the
+            # normalizer works.
+            # self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.nxs'
+            self.auxiliary_file = f'{self.data_file.rsplit(".", 1)[0]}.h5'
             self.hdf5_handler = HDF5Handler(
                 filename=self.auxiliary_file,
                 archive=archive,
                 logger=logger,
-                nexus_dataset_map=NEXUS_DATASET_MAP,
+                # TODO (ka-sarthak): use nexus dataset map once updating the flag
+                # through the normalizer works.
+                # nexus_dataset_map=NEXUS_DATASET_MAP,
             )
             read_function, write_function = self.get_read_write_functions()
             if read_function is None or write_function is None:
@@ -1383,9 +1400,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             self.auxiliary_file
         ):
             self.hdf5_handler.write_file()
-            self.overwrite_auxiliary_file = False
             if self.hdf5_handler.data_file != self.auxiliary_file:
                 self.auxiliary_file = self.hdf5_handler.data_file
+            # TODO (ka-sarthak): update the flag through the normalizer once it works.
+            # self.overwrite_auxiliary_file = False
         else:
             self.hdf5_handler.set_hdf5_references()
 

From 5308370947fd0f9043cdc6e8fd6a37a631575f84 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 23 Jan 2025 16:58:09 +0100
Subject: [PATCH 38/41] Reprocess nxs entry

---
 src/nomad_measurements/utils.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 3ca87437..5f51ab31 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -389,7 +389,6 @@ def _write_nx_file(self):
         to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus
         view of the data in addition to storing array data.
         """
-        from nomad.processing.data import Entry
 
         app_def = 'NXxrd_pan'
         nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def)
@@ -430,12 +429,9 @@ def _write_nx_file(self):
         pynxtools_writer(
             data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path
         ).write()
-
-        entry_list = Entry.objects(
-            upload_id=self.archive.m_context.upload_id, mainfile=self.data_file
+        self.archive.m_context.process_updated_raw_file(
+            self.data_file, allow_modify=True
         )
-        if not entry_list:
-            self.archive.m_context.process_updated_raw_file(self.data_file)
 
     def _write_hdf5_file(self):  # noqa: PLR0912
         """

From 1682c9413c2e3fd1ba190a904e365e7cfa81d971 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 23 Jan 2025 17:35:47 +0100
Subject: [PATCH 39/41] Handle missing dataset in add step

---
 src/nomad_measurements/utils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 5f51ab31..ad952ed1 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -269,18 +269,21 @@ def add_dataset(
             validate_path (bool): If True, the dataset path is validated.
         """
         if not params:
-            self.logger.warning('Dataset `params` not provided.')
+            self.logger.warning(f'No params provided for path "{path}". Skipping.')
             return
 
         dataset = DatasetModel(
             **params,
         )
+        if dataset.data is None:
+            self.logger.warning(f'No data provided for the path "{path}". Skipping.')
+            return
         if (
             validate_path
             and self.valid_dataset_paths
             and path not in self.valid_dataset_paths
         ):
-            self.logger.warning(f'Invalid dataset path "{path}".')
+            self.logger.warning(f'Invalid dataset path "{path}". Skipping.')
             return
 
         # handle the pint.Quantity and add data
@@ -311,7 +314,7 @@ def add_attribute(
             params (dict): The attributes to be added.
         """
         if not params:
-            self.logger.warning('Attribute `params` not provided.')
+            self.logger.warning(f'No params provided for attribute {path}.')
             return
         self._hdf5_attributes[path] = params
 
@@ -459,9 +462,6 @@ def _write_hdf5_file(self):  # noqa: PLR0912
         ) as h5:
             for key, value in self._hdf5_datasets.items():
                 data = value.data
-                if data is None:
-                    self.logger.warning(f'No data found for "{key}". Skipping.')
-                    continue
                 if value.internal_reference:
                     # resolve the internal reference
                     try:

From 96bdf07621f3e2f36467e5d2e1433755585623af Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Thu, 23 Jan 2025 17:51:02 +0100
Subject: [PATCH 40/41] Comment out import

---
 src/nomad_measurements/xrd/schema.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index f2ece071..df2e7e67 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -78,7 +78,8 @@
     get_reference,
     merge_sections,
 )
-from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP
+
+# from nomad_measurements.xrd.nx import NEXUS_DATASET_MAP
 
 if TYPE_CHECKING:
     from nomad.datamodel.datamodel import (

From ab1081dccd788bd79d5bffcb1f16029bc65d7d34 Mon Sep 17 00:00:00 2001
From: Sarthak Kapoor <sarthakkapoor@hotmail.com>
Date: Fri, 24 Jan 2025 14:28:01 +0100
Subject: [PATCH 41/41] Review: sourcery

---
 src/nomad_measurements/utils.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index ad952ed1..ab4def11 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -234,7 +234,7 @@ def __init__(
         self.archive = archive
         self.logger = logger
 
-        self.nexus = True if nexus_dataset_map else False
+        self.nexus = bool(nexus_dataset_map)
         self.nexus_dataset_map = nexus_dataset_map
         self.valid_dataset_paths = (
             list(nexus_dataset_map.keys()) if nexus_dataset_map else []
@@ -344,8 +344,7 @@ def read_dataset(self, path: str, is_archive_path: bool = False):
         if dataset_path in self._hdf5_datasets:
             value = self._hdf5_datasets[dataset_path].data
             if dataset_path in self._hdf5_attributes:
-                units = self._hdf5_attributes[dataset_path].get('units')
-                if units:
+                if units := self._hdf5_attributes[dataset_path].get('units'):
                     value *= ureg(units)
             return value
 
@@ -554,13 +553,14 @@ def _remove_nexus_annotations(path: str) -> str:
             return path
 
         pattern = r'.*\[.*\]'
-        new_path = ''
-        for part in path.split('/')[1:]:
-            if re.match(pattern, part):
-                new_path += '/' + part.split('[')[0].strip().lower()
-            else:
-                new_path += '/' + part
-        return new_path
+        return ''.join(
+            (
+                '/' + part.split('[')[0].strip().lower()
+                if re.match(pattern, part)
+                else f'/{part}'
+            )
+            for part in path.split('/')[1:]
+        )
 
     @staticmethod
     def _set_hdf5_reference(