From 29f1ac9c734a01e75eab579c4a31730f084900eb Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Fri, 23 Aug 2024 16:06:25 +0200
Subject: [PATCH 01/10] save current state

---
 .gitmodules                           |   2 +-
 src/pynxtools/dataconverter/writer.py | 148 ++++++++++++++++++++++++++
 2 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 71907ead7..ed00dea93 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "src/pynxtools/definitions"]
 	path = src/pynxtools/definitions
-	url = https://github.com/FAIRmat-NFDI/nexus_definitions.git
\ No newline at end of file
+	url = https://github.com/FAIRmat-NFDI/nexus_definitions.git
diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index d22307c88..467554173 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -237,6 +237,134 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
 
         return elem.attrib
 
+    def get_nxdl_docs(self, path: str = "/", attr: bool = False) -> dict:
+        """Get the NXDL docs for a path in the data."""
+
+        def extract_and_format_docs(elem: ET.Element) -> str:
+            """Get the docstring for a given element in the NDXL tree."""
+            docs = elem.findall(f"{self.nxs_namespace}doc")
+            if docs:
+                return docs[0].text.strip().replace("\\n", "\n")
+            return ""
+
+        nxdl_path = helpers.convert_data_converter_dict_to_nxdl_path(path)
+
+        try:
+            elem = get_node_at_nxdl_path(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
+        except NxdlAttributeNotFoundError:
+            return None
+
+        if not attr:
+            return extract_and_format_docs(elem)
+        else:
+            from pynxtools.definitions.dev_tools.utils.nxdl_utils import (
+                get_inherited_nodes,
+            )
+
+            (class_path, nxdlpath, elist) = get_inherited_nodes(
+                nxdl_path, elem=copy.deepcopy(self.nxdl_data)
+            )
+            print(class_path, nxdlpath, elist)
+            if "data/@signal" in path:
+                print((class_path, nxdlpath, elist))
+                elem = get_node_at_nxdl_path(
+                    nxdl_path, elem=copy.deepcopy(self.nxdl_data)
+                )
+                print(path, nxdl_path, elem)
+
+            # return get_nxdl_attr_doc(elem)
+
+        # def append_docs(elem, doc: str):
+        #     """Append docs to existing docs."""
+        #     if isinstance(elem, (h5py.Group, h5py.Dataset)):
+        #         existing_doc = elem.attrs.get("doc", "")
+        #     # if isinstance(elem, (h5py.Attribute)):
+        #     #     existing_doc = str(elem)
+
+        #     if doc:
+        #         if isinstance(existing_doc, str):
+        #             return existing_doc + doc
+        #         else:
+        #             return doc
+
+    # def get_nxdl_attr_doc(  # pylint: disable=too-many-arguments,too-many-locals
+    #     elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info
+    # ):
+    #     """Get nxdl documentation for an attribute"""
+    #     new_elem = []
+    #     old_elem = elem
+    #     attr_inheritance_chain = []
+    #     for elem_index, act_elem1 in enumerate(elist):
+    #         act_elem = act_elem1
+    #         # NX_class is a compulsory attribute for groups in a nexus file
+    #         # which should match the type of the corresponding NXDL element
+    #         if (
+    #             attr == "NX_class"
+    #             and not isinstance(hdf_node, h5py.Dataset)
+    #             and elem_index == 0
+    #         ):
+    #             elem = None
+    #             logger, doc, attr = write_doc_string(logger, doc, attr)
+    #             new_elem = elem
+    #             break
+    #         # units category is a compulsory attribute for any fields
+    #         if attr == "units" and isinstance(hdf_node, h5py.Dataset):
+    #             req_str = "<<REQUIRED>>"
+    #             logger, act_elem, attr_inheritance_chain, doc, attr = try_find_units(
+    #                 logger, act_elem, attr_inheritance_chain, doc, attr
+    #             )
+    #         # units for attributes can be given as ATTRIBUTENAME_units
+    #         elif attr.endswith("_units"):
+    #             logger, act_elem, attr_inheritance_chain, doc, attr, req_str = (
+    #                 check_attr_name_nxdl(
+    #                     (logger, act_elem, attr_inheritance_chain, doc, attr, req_str)
+    #                 )
+    #             )
+    #         # default is allowed for groups
+    #         elif attr == "default" and not isinstance(hdf_node, h5py.Dataset):
+    #             req_str = "<<RECOMMENDED>>"
+    #             # try to find if default is defined as a child of the NXDL element
+    #             act_elem = get_nxdl_child(
+    #                 act_elem, attr, nexus_type="attribute", go_base=False
+    #             )
+    #             logger, act_elem, attr_inheritance_chain, doc, attr = try_find_default(
+    #                 logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr
+    #             )
+    #         else:  # other attributes
+    #             act_elem = get_nxdl_child(
+    #                 act_elem, attr, nexus_type="attribute", go_base=False
+    #             )
+    #             if act_elem is not None:
+    #                 logger, act_elem, attr_inheritance_chain, doc, attr = other_attrs(
+    #                     logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr
+    #                 )
+    #         if act_elem is not None:
+    #             new_elem.append(act_elem)
+    #             if req_str is None:
+    #                 req_str = get_required_string(act_elem)  # check for being required
+    #                 if doc:
+    #                     logger.debug(req_str)
+    #             variables = [logger, act_elem, path]
+    #             (
+    #                 logger,
+    #                 elem,
+    #                 path,
+    #                 doc,
+    #                 elist,
+    #                 attr,
+    #                 hdf_node,
+    #             ) = check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node)
+    #     elem = old_elem
+    #     if req_str is None and doc:
+    #         if attr != "NX_class":
+    #             logger.debug("@" + attr + " - IS NOT IN SCHEMA")
+    #         logger.debug("")
+
+    #     # Add the lowest child element to the nxdl_path
+    #     if attr_inheritance_chain:
+    #         nxdl_path.append(attr_inheritance_chain[0])
+    #     return (req_str, get_nxdl_entry(hdf_info), nxdl_path)
+
     def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Group:
         """Returns the parent if it exists for a given path else creates the parent group."""
         parent_path = path[0 : path.rindex("/")] or "/"
@@ -249,6 +377,11 @@ def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Grou
 
             if attrs is not None:
                 grp.attrs["NX_class"] = attrs["type"]
+
+            docs = self.get_nxdl_docs(parent_path)
+            if docs:
+                grp.attrs["doc"] = docs
+
             return grp
         return self.output_nexus[parent_path_hdf5]
 
@@ -263,6 +396,11 @@ def add_units_key(dataset, path):
                 dataset.attrs["units"] = self.data[units_key]
 
         for path, value in self.data.items():
+            if path.split("/")[-1][0] == "@":
+                docs = self.get_nxdl_docs(path, attr=True)
+            else:
+                docs = self.get_nxdl_docs(path)
+
             try:
                 if path[path.rindex("/") + 1 :] == "@units":
                     continue
@@ -279,17 +417,24 @@ def add_units_key(dataset, path):
                     grp = self.ensure_and_get_parent_node(
                         path, self.data.undocumented.keys()
                     )
+
                     if isinstance(data, dict):
                         if "compress" in data.keys():
                             dataset = handle_dicts_entries(
                                 data, grp, entry_name, self.output_path, path
                             )
+                            if docs:
+                                dataset.attrs["doc"] = docs
+
                         else:
                             hdf5_links_for_later.append(
                                 [data, grp, entry_name, self.output_path, path]
                             )
                     else:
                         dataset = grp.create_dataset(entry_name, data=data)
+                        if docs:
+                            dataset.attrs["doc"] = docs
+
             except InvalidDictProvided as exc:
                 print(str(exc))
             except Exception as exc:
@@ -305,6 +450,7 @@ def add_units_key(dataset, path):
                 del self.data[links[-1]]
 
         for path, value in self.data.items():
+            docs = self.get_nxdl_docs(path)
             try:
                 if path[path.rindex("/") + 1 :] == "@units":
                     continue
@@ -327,6 +473,8 @@ def add_units_key(dataset, path):
                         path, self.data.undocumented.keys()
                     )
                     dataset.attrs[entry_name[1:]] = data
+                    if docs:
+                        dataset.attrs["doc"] = docs
             except Exception as exc:
                 raise IOError(
                     f"Unknown error occured writing the path: {path} "

From 24c3b4138286db687e067eacad62cdc7198fa78a Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:09:51 +0200
Subject: [PATCH 02/10] fix everything except for attribute doc retrieval

---
 src/pynxtools/data/NXtest.nxdl.xml     |   1 +
 src/pynxtools/dataconverter/convert.py |  15 +-
 src/pynxtools/dataconverter/writer.py  | 195 ++++++++-----------------
 tests/dataconverter/test_writer.py     |  20 +++
 4 files changed, 93 insertions(+), 138 deletions(-)

diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml
index 8695a20c9..01e39214d 100644
--- a/src/pynxtools/data/NXtest.nxdl.xml
+++ b/src/pynxtools/data/NXtest.nxdl.xml
@@ -14,6 +14,7 @@
         <field name="definition">
             <doc>This is a dummy NXDL to test out the dataconverter.</doc>
             <attribute name="version"/>
+                <doc>This is the version of the definition.</doc>
             <enumeration>
                 <item value="NXTEST"/>
                 <item value="NXtest"/>
diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py
index 508071906..0ca50fc44 100644
--- a/src/pynxtools/dataconverter/convert.py
+++ b/src/pynxtools/dataconverter/convert.py
@@ -237,7 +237,11 @@ def convert(
     )
 
     helpers.add_default_root_attributes(data=data, filename=os.path.basename(output))
-    Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write()
+
+    write_docs = kwargs.pop("write_docs", False)
+    Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write(
+        write_docs=write_docs
+    )
 
     logger.info(f"The output file generated: {output}.")
 
@@ -350,6 +354,12 @@ def main_cli():
     default=None,
     help="A json config file for the reader",
 )
+@click.option(
+    "--write-docs",
+    is_flag=True,
+    default=False,
+    help="Write docs for the individual NeXus concepts as HDF5 attributes.",
+)
 # pylint: disable=too-many-arguments
 def convert_cli(
     files: Tuple[str, ...],
@@ -390,6 +400,9 @@ def convert_cli(
     if config_file:
         kwargs["config_file"] = config_file
 
+    if write_docs:
+        kwargs["write_docs"] = write_docs
+
     file_list = []
     for file in files:
         if os.path.isdir(file):
diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index 467554173..e7cd667b6 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -21,7 +21,6 @@
 
 import copy
 import logging
-import sys
 import xml.etree.ElementTree as ET
 
 import h5py
@@ -32,6 +31,7 @@
 from pynxtools.definitions.dev_tools.utils.nxdl_utils import (
     NxdlAttributeNotFoundError,
     get_node_at_nxdl_path,
+    get_inherited_nodes,
 )
 
 logger = logging.getLogger("pynxtools")  # pylint: disable=C0103
@@ -109,7 +109,7 @@ def handle_shape_entries(data, file, path):
 
 
 # pylint: disable=too-many-locals, inconsistent-return-statements
-def handle_dicts_entries(data, grp, entry_name, output_path, path):
+def handle_dicts_entries(data, grp, entry_name, output_path, path, docs):
     """Handle function for dictionaries found as value of the nexus file.
 
     Several cases can be encoutered:
@@ -118,12 +118,14 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path):
     - Internal links
     - External links
     - compression label"""
+
     if "link" in data:
         file, path = split_link(data, output_path)
     # generate virtual datasets from slices
     if "shape" in data.keys():
         layout = handle_shape_entries(data, file, path)
-        grp.create_virtual_dataset(entry_name, layout)
+        dataset = grp.create_virtual_dataset(entry_name, layout)
+
     # multiple datasets to concatenate
     elif "link" in data.keys() and isinstance(data["link"], list):
         total_length = 0
@@ -141,7 +143,7 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path):
         for vsource in sources:
             layout[offset : offset + vsource.shape[0]] = vsource
             offset += vsource.shape[0]
-        grp.create_virtual_dataset(entry_name, layout, fillvalue=0)
+        dataset = grp.create_virtual_dataset(entry_name, layout, fillvalue=0)
     # internal and external links
     elif "link" in data.keys():
         if ":/" not in data["link"]:
@@ -159,7 +161,7 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path):
             )
             if accept is True:
                 strength = data["strength"]
-            grp.create_dataset(
+            dataset = grp.create_dataset(
                 entry_name,
                 data=data["compress"],
                 compression="gzip",
@@ -167,13 +169,20 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path):
                 compression_opts=strength,
             )
         else:
-            grp.create_dataset(entry_name, data=data["compress"])
+            dataset = grp.create_dataset(entry_name, data=data["compress"])
     else:
         raise InvalidDictProvided(
             "A dictionary was provided to the template but it didn't"
             " fall into any of the know cases of handling"
             " dictionaries. This occured for: " + entry_name
         )
+
+    if docs:
+        try:
+            dataset.attrs["docs"] = docs
+        except NameError:
+            pass
+
     # Check whether link has been stabilished or not
     try:
         return grp[entry_name]
@@ -198,10 +207,14 @@ class Writer:
         output_nexus (h5py.File): The h5py file object to manipulate output file.
         nxdl_data (dict): Stores xml data from given nxdl file to use during conversion.
         nxs_namespace (str): The namespace used in the NXDL tags. Helps search for XML children.
+        write_docs (bool): Write docs for the individual NeXus concepts as HDF5 attributes.
     """
 
     def __init__(
-        self, data: dict = None, nxdl_f_path: str = None, output_path: str = None
+        self,
+        data: dict = None,
+        nxdl_f_path: str = None,
+        output_path: str = None,
     ):
         """Constructs the necessary objects required by the Writer class."""
         self.data = data
@@ -211,6 +224,8 @@ def __init__(
         self.nxdl_data = ET.parse(self.nxdl_f_path).getroot()
         self.nxs_namespace = get_namespace(self.nxdl_data)
 
+        self.write_docs: bool = False
+
     def __nxdl_to_attrs(self, path: str = "/") -> dict:
         """
         Return a dictionary of all the attributes at the given path in the NXDL and
@@ -237,7 +252,7 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
 
         return elem.attrib
 
-    def get_nxdl_docs(self, path: str = "/", attr: bool = False) -> dict:
+    def __nxdl_docs(self, path: str = "/") -> dict:
         """Get the NXDL docs for a path in the data."""
 
         def extract_and_format_docs(elem: ET.Element) -> str:
@@ -247,123 +262,27 @@ def extract_and_format_docs(elem: ET.Element) -> str:
                 return docs[0].text.strip().replace("\\n", "\n")
             return ""
 
+        def get_nxdl_attr_doc(nxdl_path):
+            return ""
+
+        docs: str = ""
+
+        if not self.write_docs:
+            return docs
+
         nxdl_path = helpers.convert_data_converter_dict_to_nxdl_path(path)
 
-        try:
-            elem = get_node_at_nxdl_path(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
-        except NxdlAttributeNotFoundError:
-            return None
+        _, _, elist = get_inherited_nodes(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
 
-        if not attr:
-            return extract_and_format_docs(elem)
-        else:
-            from pynxtools.definitions.dev_tools.utils.nxdl_utils import (
-                get_inherited_nodes,
-            )
+        for elem in elist:
+            if not docs:
+                # Only use docs from superclasses if they are not extended.
+                docs += extract_and_format_docs(elem)
 
-            (class_path, nxdlpath, elist) = get_inherited_nodes(
-                nxdl_path, elem=copy.deepcopy(self.nxdl_data)
-            )
-            print(class_path, nxdlpath, elist)
-            if "data/@signal" in path:
-                print((class_path, nxdlpath, elist))
-                elem = get_node_at_nxdl_path(
-                    nxdl_path, elem=copy.deepcopy(self.nxdl_data)
-                )
-                print(path, nxdl_path, elem)
-
-            # return get_nxdl_attr_doc(elem)
-
-        # def append_docs(elem, doc: str):
-        #     """Append docs to existing docs."""
-        #     if isinstance(elem, (h5py.Group, h5py.Dataset)):
-        #         existing_doc = elem.attrs.get("doc", "")
-        #     # if isinstance(elem, (h5py.Attribute)):
-        #     #     existing_doc = str(elem)
-
-        #     if doc:
-        #         if isinstance(existing_doc, str):
-        #             return existing_doc + doc
-        #         else:
-        #             return doc
-
-    # def get_nxdl_attr_doc(  # pylint: disable=too-many-arguments,too-many-locals
-    #     elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info
-    # ):
-    #     """Get nxdl documentation for an attribute"""
-    #     new_elem = []
-    #     old_elem = elem
-    #     attr_inheritance_chain = []
-    #     for elem_index, act_elem1 in enumerate(elist):
-    #         act_elem = act_elem1
-    #         # NX_class is a compulsory attribute for groups in a nexus file
-    #         # which should match the type of the corresponding NXDL element
-    #         if (
-    #             attr == "NX_class"
-    #             and not isinstance(hdf_node, h5py.Dataset)
-    #             and elem_index == 0
-    #         ):
-    #             elem = None
-    #             logger, doc, attr = write_doc_string(logger, doc, attr)
-    #             new_elem = elem
-    #             break
-    #         # units category is a compulsory attribute for any fields
-    #         if attr == "units" and isinstance(hdf_node, h5py.Dataset):
-    #             req_str = "<<REQUIRED>>"
-    #             logger, act_elem, attr_inheritance_chain, doc, attr = try_find_units(
-    #                 logger, act_elem, attr_inheritance_chain, doc, attr
-    #             )
-    #         # units for attributes can be given as ATTRIBUTENAME_units
-    #         elif attr.endswith("_units"):
-    #             logger, act_elem, attr_inheritance_chain, doc, attr, req_str = (
-    #                 check_attr_name_nxdl(
-    #                     (logger, act_elem, attr_inheritance_chain, doc, attr, req_str)
-    #                 )
-    #             )
-    #         # default is allowed for groups
-    #         elif attr == "default" and not isinstance(hdf_node, h5py.Dataset):
-    #             req_str = "<<RECOMMENDED>>"
-    #             # try to find if default is defined as a child of the NXDL element
-    #             act_elem = get_nxdl_child(
-    #                 act_elem, attr, nexus_type="attribute", go_base=False
-    #             )
-    #             logger, act_elem, attr_inheritance_chain, doc, attr = try_find_default(
-    #                 logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr
-    #             )
-    #         else:  # other attributes
-    #             act_elem = get_nxdl_child(
-    #                 act_elem, attr, nexus_type="attribute", go_base=False
-    #             )
-    #             if act_elem is not None:
-    #                 logger, act_elem, attr_inheritance_chain, doc, attr = other_attrs(
-    #                     logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr
-    #                 )
-    #         if act_elem is not None:
-    #             new_elem.append(act_elem)
-    #             if req_str is None:
-    #                 req_str = get_required_string(act_elem)  # check for being required
-    #                 if doc:
-    #                     logger.debug(req_str)
-    #             variables = [logger, act_elem, path]
-    #             (
-    #                 logger,
-    #                 elem,
-    #                 path,
-    #                 doc,
-    #                 elist,
-    #                 attr,
-    #                 hdf_node,
-    #             ) = check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node)
-    #     elem = old_elem
-    #     if req_str is None and doc:
-    #         if attr != "NX_class":
-    #             logger.debug("@" + attr + " - IS NOT IN SCHEMA")
-    #         logger.debug("")
-
-    #     # Add the lowest child element to the nxdl_path
-    #     if attr_inheritance_chain:
-    #         nxdl_path.append(attr_inheritance_chain[0])
-    #     return (req_str, get_nxdl_entry(hdf_info), nxdl_path)
+        if not elist:
+            return get_nxdl_attr_doc(nxdl_path)
+
+        return docs
 
     def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Group:
         """Returns the parent if it exists for a given path else creates the parent group."""
@@ -378,9 +297,9 @@ def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Grou
             if attrs is not None:
                 grp.attrs["NX_class"] = attrs["type"]
 
-            docs = self.get_nxdl_docs(parent_path)
+            docs = self.__nxdl_docs(parent_path)
             if docs:
-                grp.attrs["doc"] = docs
+                grp.attrs["docs"] = docs
 
             return grp
         return self.output_nexus[parent_path_hdf5]
@@ -396,10 +315,7 @@ def add_units_key(dataset, path):
                 dataset.attrs["units"] = self.data[units_key]
 
         for path, value in self.data.items():
-            if path.split("/")[-1][0] == "@":
-                docs = self.get_nxdl_docs(path, attr=True)
-            else:
-                docs = self.get_nxdl_docs(path)
+            docs = self.__nxdl_docs(path)
 
             try:
                 if path[path.rindex("/") + 1 :] == "@units":
@@ -421,19 +337,17 @@ def add_units_key(dataset, path):
                     if isinstance(data, dict):
                         if "compress" in data.keys():
                             dataset = handle_dicts_entries(
-                                data, grp, entry_name, self.output_path, path
+                                data, grp, entry_name, self.output_path, path, docs
                             )
-                            if docs:
-                                dataset.attrs["doc"] = docs
 
                         else:
                             hdf5_links_for_later.append(
-                                [data, grp, entry_name, self.output_path, path]
+                                [data, grp, entry_name, self.output_path, path, docs]
                             )
                     else:
                         dataset = grp.create_dataset(entry_name, data=data)
                         if docs:
-                            dataset.attrs["doc"] = docs
+                            dataset.attrs["docs"] = docs
 
             except InvalidDictProvided as exc:
                 print(str(exc))
@@ -450,7 +364,7 @@ def add_units_key(dataset, path):
                 del self.data[links[-1]]
 
         for path, value in self.data.items():
-            docs = self.get_nxdl_docs(path)
+            docs = self.__nxdl_docs(path)
             try:
                 if path[path.rindex("/") + 1 :] == "@units":
                     continue
@@ -458,6 +372,7 @@ def add_units_key(dataset, path):
                 entry_name = helpers.get_name_from_data_dict_entry(
                     path[path.rindex("/") + 1 :]
                 )
+
                 if is_not_data_empty(value):
                     data = value
                 else:
@@ -468,21 +383,27 @@ def add_units_key(dataset, path):
 
                     add_units_key(self.output_nexus[path_hdf5], path)
                 else:
-                    # consider changing the name here the lvalue can also be group!
                     dataset = self.ensure_and_get_parent_node(
                         path, self.data.undocumented.keys()
                     )
                     dataset.attrs[entry_name[1:]] = data
                     if docs:
-                        dataset.attrs["doc"] = docs
+                        # Write docs for attributes like <attr>__docs
+                        dataset.attrs[f"{entry_name[1:]}__docs"] = docs
             except Exception as exc:
                 raise IOError(
                     f"Unknown error occured writing the path: {path} "
                     f"with the following message: {str(exc)}"
                 ) from exc
 
-    def write(self):
-        """Writes the NeXus file with previously validated data from the reader with NXDL attrs."""
+    def write(self, write_docs=False):
+        """
+        Writes the NeXus file with previously validated data from the reader with NXDL attrs.
+
+        Args:
+            write_docs (bool): Write docs for the individual NeXus concepts as HDF5 attributes. The default is False.
+        """
+        self.write_docs = write_docs
         try:
             self._put_data_into_hdf5()
         finally:
diff --git a/tests/dataconverter/test_writer.py b/tests/dataconverter/test_writer.py
index acc84d8d5..6f6f7c898 100644
--- a/tests/dataconverter/test_writer.py
+++ b/tests/dataconverter/test_writer.py
@@ -59,6 +59,26 @@ def test_write(writer):
     assert test_nxs["/my_entry/nxodd_name/posint_value"].shape == (3,)  # pylint: disable=no-member
 
 
+def test_write_docs(writer):
+    """Test for the Writer's write_docs option. Checks whether docs are written for NeXus concepts."""
+    writer.write(write_docs=True)
+    test_nxs = h5py.File(writer.output_path, "r")
+    # print(writer.output_path)
+    assert (
+        test_nxs["/my_entry"].attrs["docs"]
+        == "This is a dummy NXDL to test out the dataconverter."
+    )
+    # assert test_nxs["/my_entry/definition"].attrs["version__docs"] == "This is the version of the definition."
+    assert (
+        test_nxs["/my_entry/nxodd_name/int_value"].attrs["docs"]
+        == "A dummy entry for an int value."
+    )
+    assert (
+        test_nxs["/my_entry/required_group"].attrs["docs"]
+        == "This is a required yet empty group."
+    )
+
+
 def test_write_link(writer):
     """Test for the Writer's write function.
 

From a8132430d2d10d620f1ebf8eb920e711c712010d Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:02:54 +0200
Subject: [PATCH 03/10] mypy fixes

---
 src/pynxtools/dataconverter/convert.py |  1 +
 src/pynxtools/dataconverter/writer.py  | 10 ++--------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py
index 0ca50fc44..8cf6cd0a6 100644
--- a/src/pynxtools/dataconverter/convert.py
+++ b/src/pynxtools/dataconverter/convert.py
@@ -373,6 +373,7 @@ def convert_cli(
     mapping: str,
     config_file: str,
     fail: bool,
+    write_docs: bool,
     **kwargs,
 ):
     """This command allows you to use the converter functionality of the dataconverter."""
diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index e7cd667b6..d4b44f46d 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -118,14 +118,12 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path, docs):
     - Internal links
     - External links
     - compression label"""
-
     if "link" in data:
         file, path = split_link(data, output_path)
     # generate virtual datasets from slices
     if "shape" in data.keys():
         layout = handle_shape_entries(data, file, path)
         dataset = grp.create_virtual_dataset(entry_name, layout)
-
     # multiple datasets to concatenate
     elif "link" in data.keys() and isinstance(data["link"], list):
         total_length = 0
@@ -211,10 +209,7 @@ class Writer:
     """
 
     def __init__(
-        self,
-        data: dict = None,
-        nxdl_f_path: str = None,
-        output_path: str = None,
+        self, data: dict = None, nxdl_f_path: str = None, output_path: str = None
     ):
         """Constructs the necessary objects required by the Writer class."""
         self.data = data
@@ -252,7 +247,7 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
 
         return elem.attrib
 
-    def __nxdl_docs(self, path: str = "/") -> dict:
+    def __nxdl_docs(self, path: str = "/") -> str:
         """Get the NXDL docs for a path in the data."""
 
         def extract_and_format_docs(elem: ET.Element) -> str:
@@ -372,7 +367,6 @@ def add_units_key(dataset, path):
                 entry_name = helpers.get_name_from_data_dict_entry(
                     path[path.rindex("/") + 1 :]
                 )
-
                 if is_not_data_empty(value):
                     data = value
                 else:

From 19f6364f7015dd506002f77bcc066a1e86ac7570 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Fri, 30 Aug 2024 10:26:07 +0200
Subject: [PATCH 04/10] add special logic for appdef dos and attributes

---
 src/pynxtools/dataconverter/writer.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index d4b44f46d..f52157f64 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -257,16 +257,19 @@ def extract_and_format_docs(elem: ET.Element) -> str:
                 return docs[0].text.strip().replace("\\n", "\n")
             return ""
 
-        def get_nxdl_attr_doc(nxdl_path):
-            return ""
-
         docs: str = ""
 
         if not self.write_docs:
-            return docs
+            return
 
         nxdl_path = helpers.convert_data_converter_dict_to_nxdl_path(path)
 
+        if nxdl_path == "/ENTRY":
+            # Special case for docs of application definition
+            app_def_docs = extract_and_format_docs(self.nxdl_data)
+            if app_def_docs:
+                return app_def_docs
+
         _, _, elist = get_inherited_nodes(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
 
         for elem in elist:
@@ -275,7 +278,14 @@ def get_nxdl_attr_doc(nxdl_path):
                 docs += extract_and_format_docs(elem)
 
         if not elist:
-            return get_nxdl_attr_doc(nxdl_path)
+            # Handle docs for attributeS
+            (_, inherited_nodes, _) = get_inherited_nodes(
+                nxdl_path, elem=copy.deepcopy(self.nxdl_data)
+            )
+            attrs = inherited_nodes[-1].findall(f"{self.nxs_namespace}attribute")
+            for attr in attrs:
+                if attr.attrib["name"] == path.split("@")[-1]:
+                    docs += extract_and_format_docs(attr)
 
         return docs
 

From 1d24501e241d8f901b760e6a0bb3676ec6fe9663 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Fri, 30 Aug 2024 10:28:19 +0200
Subject: [PATCH 05/10] test for attribute docs

---
 tests/dataconverter/test_writer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/dataconverter/test_writer.py b/tests/dataconverter/test_writer.py
index 6f6f7c898..a6502c519 100644
--- a/tests/dataconverter/test_writer.py
+++ b/tests/dataconverter/test_writer.py
@@ -68,7 +68,10 @@ def test_write_docs(writer):
         test_nxs["/my_entry"].attrs["docs"]
         == "This is a dummy NXDL to test out the dataconverter."
     )
-    # assert test_nxs["/my_entry/definition"].attrs["version__docs"] == "This is the version of the definition."
+    assert (
+        test_nxs["/my_entry/definition"].attrs["version__docs"]
+        == "This is the version of the definition."
+    )
     assert (
         test_nxs["/my_entry/nxodd_name/int_value"].attrs["docs"]
         == "A dummy entry for an int value."

From b289e49cf8615944cdcde77ab7d44da864438934 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Fri, 30 Aug 2024 12:05:19 +0200
Subject: [PATCH 06/10] mypy fix

---
 src/pynxtools/dataconverter/writer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index f52157f64..45a499d62 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -22,6 +22,7 @@
 import copy
 import logging
 import xml.etree.ElementTree as ET
+from typing import Optional
 
 import h5py
 import numpy as np
@@ -247,7 +248,7 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
 
         return elem.attrib
 
-    def __nxdl_docs(self, path: str = "/") -> str:
+    def __nxdl_docs(self, path: str = "/") -> Optional[str]:
         """Get the NXDL docs for a path in the data."""
 
         def extract_and_format_docs(elem: ET.Element) -> str:
@@ -260,7 +261,7 @@ def extract_and_format_docs(elem: ET.Element) -> str:
         docs: str = ""
 
         if not self.write_docs:
-            return
+            return None
 
         nxdl_path = helpers.convert_data_converter_dict_to_nxdl_path(path)
 

From 81edf905c27cb8be7717b41e4f197ad1daa8c1db Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Fri, 30 Aug 2024 12:35:24 +0200
Subject: [PATCH 07/10] fix attribute docs in NXtest.nxdl.xml

---
 src/pynxtools/data/NXtest.nxdl.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml
index 01e39214d..04d9dd1b8 100644
--- a/src/pynxtools/data/NXtest.nxdl.xml
+++ b/src/pynxtools/data/NXtest.nxdl.xml
@@ -13,8 +13,9 @@
         <field name="program_name"/>
         <field name="definition">
             <doc>This is a dummy NXDL to test out the dataconverter.</doc>
-            <attribute name="version"/>
+            <attribute name="version">
                 <doc>This is the version of the definition.</doc>
+            </attribute>
             <enumeration>
                 <item value="NXTEST"/>
                 <item value="NXtest"/>

From 10c2987b0209bb4a43df0863591f77f9345f2d98 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:06:09 +0200
Subject: [PATCH 08/10] allow for different docstyles

---
 src/pynxtools/dataconverter/convert.py | 24 ++++++++++++++++--
 src/pynxtools/dataconverter/writer.py  | 34 ++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py
index 8cf6cd0a6..66aecff29 100644
--- a/src/pynxtools/dataconverter/convert.py
+++ b/src/pynxtools/dataconverter/convert.py
@@ -239,8 +239,10 @@ def convert(
     helpers.add_default_root_attributes(data=data, filename=os.path.basename(output))
 
     write_docs = kwargs.pop("write_docs", False)
+    docs_format = kwargs.pop("docs_format", None)
     Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write(
-        write_docs=write_docs
+        write_docs=write_docs,
+        docs_format=docs_format,
     )
 
     logger.info(f"The output file generated: {output}.")
@@ -360,7 +362,15 @@ def main_cli():
     default=False,
     help="Write docs for the individual NeXus concepts as HDF5 attributes.",
 )
-# pylint: disable=too-many-arguments
+@click.option(
+    "--docs-format",
+    type=click.Choice(["default", "html", "html5", "xml", "pseudoxml"]),
+    default=None,
+    help=(
+        "Optionally specify the format in which the docs for the individual NeXus concepts is generated. "
+        "By default, the docs are formatted as in the NXDL file."
+    ),
+)
 def convert_cli(
     files: Tuple[str, ...],
     input_file: Tuple[str, ...],
@@ -374,6 +384,7 @@ def convert_cli(
     config_file: str,
     fail: bool,
     write_docs: bool,
+    docs_format: str,
     **kwargs,
 ):
     """This command allows you to use the converter functionality of the dataconverter."""
@@ -403,6 +414,15 @@ def convert_cli(
 
     if write_docs:
         kwargs["write_docs"] = write_docs
+        if not docs_format:
+            kwargs["docs_format"] = "default"
+        else:
+            kwargs["docs_format"] = docs_format
+
+    elif docs_format is not None:
+        raise click.UsageError(
+            "Error: --docs-format can only be used with --write-docs."
+        )
 
     file_list = []
     for file in files:
diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index 45a499d62..1945e5ab6 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -26,6 +26,7 @@
 
 import h5py
 import numpy as np
+from docutils.core import publish_string
 
 from pynxtools.dataconverter import helpers
 from pynxtools.dataconverter.exceptions import InvalidDictProvided
@@ -210,7 +211,10 @@ class Writer:
     """
 
     def __init__(
-        self, data: dict = None, nxdl_f_path: str = None, output_path: str = None
+        self,
+        data: dict = None,
+        nxdl_f_path: str = None,
+        output_path: str = None,
     ):
         """Constructs the necessary objects required by the Writer class."""
         self.data = data
@@ -221,6 +225,7 @@ def __init__(
         self.nxs_namespace = get_namespace(self.nxdl_data)
 
         self.write_docs: bool = False
+        self.docs_format: str = "default"
 
     def __nxdl_to_attrs(self, path: str = "/") -> dict:
         """
@@ -251,11 +256,29 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
     def __nxdl_docs(self, path: str = "/") -> Optional[str]:
         """Get the NXDL docs for a path in the data."""
 
+        def rst_to_html(rst_text: str) -> str:
+            """
+            Convert reStructuredText to HTML using Docutils.
+
+            Args:
+                rst_text (str): The input RST text to be converted.
+
+            Returns:
+                str: The resulting HTML content.
+            """
+            return publish_string(rst_text, writer_name="html").decode("utf-8")
+
         def extract_and_format_docs(elem: ET.Element) -> str:
             """Get the docstring for a given element in the NDXL tree."""
-            docs = elem.findall(f"{self.nxs_namespace}doc")
-            if docs:
-                return docs[0].text.strip().replace("\\n", "\n")
+            docs_elements = elem.findall(f"{self.nxs_namespace}doc")
+            if docs_elements:
+                docs = docs_elements[0].text
+                if self.docs_format != "default":
+                    docs = publish_string(docs, writer_name=self.docs_format).decode(
+                        "utf-8"
+                    )
+                print(docs.strip().replace("\\n", "\n"))
+                return docs.strip().replace("\\n", "\n")
             return ""
 
         docs: str = ""
@@ -401,7 +424,7 @@ def add_units_key(dataset, path):
                     f"with the following message: {str(exc)}"
                 ) from exc
 
-    def write(self, write_docs=False):
+    def write(self, write_docs: bool = False, docs_format: str = "default"):
         """
         Writes the NeXus file with previously validated data from the reader with NXDL attrs.
 
@@ -409,6 +432,7 @@ def write(self, write_docs=False):
             write_docs (bool): Write docs for the individual NeXus concepts as HDF5 attributes. The default is False.
         """
         self.write_docs = write_docs
+        self.docs_format = docs_format
         try:
             self._put_data_into_hdf5()
         finally:

From 1245dbd5241cb36c66c49b0cca209e7cd219a474 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:22:13 +0200
Subject: [PATCH 09/10] allow for different docstyles

---
 src/pynxtools/dataconverter/writer.py | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index 1945e5ab6..26a653929 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -19,6 +19,7 @@
 
 # pylint: disable=R0912
 
+import io
 import copy
 import logging
 import xml.etree.ElementTree as ET
@@ -120,6 +121,8 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path, docs):
     - Internal links
     - External links
     - compression label"""
+
+    # print(data, grp, entry_name, output_path, path, docs)
     if "link" in data:
         file, path = split_link(data, output_path)
     # generate virtual datasets from slices
@@ -256,28 +259,17 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict:
     def __nxdl_docs(self, path: str = "/") -> Optional[str]:
         """Get the NXDL docs for a path in the data."""
 
-        def rst_to_html(rst_text: str) -> str:
-            """
-            Convert reStructuredText to HTML using Docutils.
-
-            Args:
-                rst_text (str): The input RST text to be converted.
-
-            Returns:
-                str: The resulting HTML content.
-            """
-            return publish_string(rst_text, writer_name="html").decode("utf-8")
-
         def extract_and_format_docs(elem: ET.Element) -> str:
             """Get the docstring for a given element in the NDXL tree."""
             docs_elements = elem.findall(f"{self.nxs_namespace}doc")
             if docs_elements:
                 docs = docs_elements[0].text
                 if self.docs_format != "default":
-                    docs = publish_string(docs, writer_name=self.docs_format).decode(
-                        "utf-8"
-                    )
-                print(docs.strip().replace("\\n", "\n"))
+                    docs = publish_string(
+                        docs,
+                        writer_name=self.docs_format,
+                        settings_overrides={"warning_stream": io.StringIO()},
+                    ).decode("utf-8")
                 return docs.strip().replace("\\n", "\n")
             return ""
 
@@ -417,7 +409,7 @@ def add_units_key(dataset, path):
                     dataset.attrs[entry_name[1:]] = data
                     if docs:
                         # Write docs for attributes like <attr>__docs
-                        dataset.attrs[f"{entry_name[1:]}__docs"] = docs
+                        dataset.attrs[f"{entry_name[1:]}_docs"] = docs
             except Exception as exc:
                 raise IOError(
                     f"Unknown error occured writing the path: {path} "

From 55ea46a8f2104006dfe3b7ff7a084cda4d9b8147 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 26 Sep 2024 16:58:18 +0200
Subject: [PATCH 10/10] implement functionality for doc with concept path

---
 src/pynxtools/dataconverter/helpers.py | 58 ++++++++++++++++++++++++++
 src/pynxtools/dataconverter/writer.py  | 24 ++++++++++-
 2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py
index e9b35dffb..87c5691f0 100644
--- a/src/pynxtools/dataconverter/helpers.py
+++ b/src/pynxtools/dataconverter/helpers.py
@@ -790,6 +790,64 @@ def get_concept_basepath(path: str) -> str:
     return "/" + "/".join(concept_path)
 
 
+def get_concept_path_from_elem(elem: ET.Element) -> str:
+    """
+    Process individual XML element to generate the NeXus concept path.
+
+    Output is e.g. "NXexperiment:/NXentry/NXinstrument/NXdetector".
+    """
+
+    name = elem.attrib.get("name", "")
+    elem_type = elem.attrib.get("type", "")
+    nxdlbase = elem.attrib.get("nxdlbase", "")  # .split("/")[-1]
+    nxdlbase_class = elem.attrib.get("nxdlbase_class", "")
+    nxdlpath = elem.attrib.get("nxdlpath", "")
+    category = elem.attrib.get("category", "")
+    # optional = elem.attrib.get("optional", "")
+    # extends = elem.attrib.get("extends", "")
+
+    # print(f"tag: {tag}")
+    # print(f"name: {name}")
+    # print(f"elem_type: {elem_type}")
+    # print(f"nxdlbase: {nxdlbase}")
+    # print(f"nxdlbase_class: {nxdlbase_class}")
+    # print(f"nxdlpath: {nxdlpath}")
+    # # print(f"optional: {optional}")
+    # # print(f"extends: {extends}")
+    # print("\n")
+
+    concept_path = ""
+
+    if elem.tag.endswith("group"):
+        if nxdlbase_class and nxdlbase_class == "application":
+            concept_path += "NXmpes:"
+            concept_path += nxdlpath  # + = f"(elem_type)"
+
+        else:
+            if nxdlbase:
+                concept_path += nxdlbase.replace(".nxdl.xml", "").split(os.path.sep)[-1]
+            concept_path += nxdlpath  # + = f"(elem_type)"
+
+    elif elem.tag.endswith("field"):
+        pass
+
+    elif elem.tag.endswith("attribute"):
+        pass
+    elif elem.tag.endswith("definition"):
+        concept_path += name
+
+    return concept_path
+
+    # if nxdlpath:
+    #     # Split the nxdlpath and construct the string
+    #     path_parts = nxdlpath.strip("/").split("/")
+    #     formatted_path = "/".join(path_parts)
+    #     return f"{formatted_path}({elem_type})"
+    # else:
+    #     # For elements with no path, return the name and type
+    #     return f"{name}({elem_type})"
+
+
 def remove_namespace_from_tag(tag):
     """Helper function to remove the namespace from an XML tag."""
 
diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py
index 26a653929..83e0201e1 100644
--- a/src/pynxtools/dataconverter/writer.py
+++ b/src/pynxtools/dataconverter/writer.py
@@ -286,12 +286,34 @@ def extract_and_format_docs(elem: ET.Element) -> str:
             if app_def_docs:
                 return app_def_docs
 
-        _, _, elist = get_inherited_nodes(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
+        class_path, nxdl_elem_path, elist = get_inherited_nodes(
+            nxdl_path, elem=copy.deepcopy(self.nxdl_data)
+        )
 
+        path_to_check = "/ENTRY/INSTRUMENT/ELECTRONANALYSER/energy_resolution"  # /physical_quantity" # == "/ENTRY/SAMPLE/flood_gun_current_env/flood_gun"
+
+        if nxdl_path == path_to_check:
+            for thing in [
+                # path,
+                # nxdl_path,
+                # class_path,
+                # nxdl_elem_path,
+                # elist
+            ]:
+                print(thing, "\n")
         for elem in elist:
+            if nxdl_path == path_to_check:
+                # print(elem.tag)
+                # print("\t elem.attrib:", elem.attrib.keys())
+
+                if elem.tag.endswith(("group", "field", "attribute", "definition")):
+                    concept_path = helpers.get_concept_path_from_elem(elem), "\n"
+                #     print(concept_path)
+
             if not docs:
                 # Only use docs from superclasses if they are not extended.
                 docs += extract_and_format_docs(elem)
+        # print("\n")
 
         if not elist:
             # Handle docs for attributeS