Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NX docstring as attribute #415

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
24 changes: 22 additions & 2 deletions src/pynxtools/dataconverter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,10 @@ def convert(
helpers.add_default_root_attributes(data=data, filename=os.path.basename(output))

write_docs = kwargs.pop("write_docs", False)
docs_format = kwargs.pop("docs_format", None)
Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write(
write_docs=write_docs
write_docs=write_docs,
docs_format=docs_format,
)

logger.info(f"The output file generated: {output}.")
Expand Down Expand Up @@ -360,7 +362,15 @@ def main_cli():
default=False,
help="Write docs for the individual NeXus concepts as HDF5 attributes.",
)
# pylint: disable=too-many-arguments
@click.option(
"--docs-format",
type=click.Choice(["default", "html", "html5", "xml", "pseudoxml"]),
default=None,
help=(
"Optionally specify the format in which the docs for the individual NeXus concepts is generated. "
"By default, the docs are formatted as in the NXDL file."
),
)
def convert_cli(
files: Tuple[str, ...],
input_file: Tuple[str, ...],
Expand All @@ -374,6 +384,7 @@ def convert_cli(
config_file: str,
fail: bool,
write_docs: bool,
docs_format: str,
**kwargs,
):
"""This command allows you to use the converter functionality of the dataconverter."""
Expand Down Expand Up @@ -403,6 +414,15 @@ def convert_cli(

if write_docs:
kwargs["write_docs"] = write_docs
if not docs_format:
kwargs["docs_format"] = "default"
else:
kwargs["docs_format"] = docs_format

elif docs_format is not None:
raise click.UsageError(
"Error: --docs-format can only be used with --write-docs."
)

file_list = []
for file in files:
Expand Down
58 changes: 58 additions & 0 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,64 @@ def get_concept_basepath(path: str) -> str:
return "/" + "/".join(concept_path)


def get_concept_path_from_elem(elem: ET.Element) -> str:
"""
Process individual XML element to generate the NeXus concept path.

Output is e.g. "NXexperiment:/NXentry/NXinstrument/NXdetector".
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is called classpath, but this can be ambiguous if two concept of the same type exists next two one another. This is why the NeXus vocabulary follows the path we call html name which is unique for a given concept. E.g. NXexperiment:/ENTRY/INSTRUMENT/mydetector. Note that this does not hold the information of the nexus classes referened along the path, so if it is needed, a classpath needs to be returned, too.

"""

name = elem.attrib.get("name", "")
elem_type = elem.attrib.get("type", "")
nxdlbase = elem.attrib.get("nxdlbase", "") # .split("/")[-1]
nxdlbase_class = elem.attrib.get("nxdlbase_class", "")
nxdlpath = elem.attrib.get("nxdlpath", "")
category = elem.attrib.get("category", "")
# optional = elem.attrib.get("optional", "")
# extends = elem.attrib.get("extends", "")

# print(f"tag: {tag}")
# print(f"name: {name}")
# print(f"elem_type: {elem_type}")
# print(f"nxdlbase: {nxdlbase}")
# print(f"nxdlbase_class: {nxdlbase_class}")
# print(f"nxdlpath: {nxdlpath}")
# # print(f"optional: {optional}")
# # print(f"extends: {extends}")
# print("\n")

concept_path = ""

if elem.tag.endswith("group"):
if nxdlbase_class and nxdlbase_class == "application":
concept_path += "NXmpes:"
concept_path += nxdlpath # + = f"(elem_type)"

else:
if nxdlbase:
concept_path += nxdlbase.replace(".nxdl.xml", "").split(os.path.sep)[-1]
concept_path += nxdlpath # + = f"(elem_type)"

elif elem.tag.endswith("field"):
pass

elif elem.tag.endswith("attribute"):
pass
elif elem.tag.endswith("definition"):
concept_path += name

return concept_path

# if nxdlpath:
# # Split the nxdlpath and construct the string
# path_parts = nxdlpath.strip("/").split("/")
# formatted_path = "/".join(path_parts)
# return f"{formatted_path}({elem_type})"
# else:
# # For elements with no path, return the name and type
# return f"{name}({elem_type})"


def remove_namespace_from_tag(tag):
"""Helper function to remove the namespace from an XML tag."""

Expand Down
52 changes: 45 additions & 7 deletions src/pynxtools/dataconverter/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@

# pylint: disable=R0912

import io
import copy
import logging
import xml.etree.ElementTree as ET
from typing import Optional

import h5py
import numpy as np
from docutils.core import publish_string

from pynxtools.dataconverter import helpers
from pynxtools.dataconverter.exceptions import InvalidDictProvided
Expand Down Expand Up @@ -119,6 +121,8 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path, docs):
- Internal links
- External links
- compression label"""

# print(data, grp, entry_name, output_path, path, docs)
if "link" in data:
file, path = split_link(data, output_path)
# generate virtual datasets from slices
Expand Down Expand Up @@ -210,7 +214,10 @@ class Writer:
"""

def __init__(
self, data: dict = None, nxdl_f_path: str = None, output_path: str = None
self,
data: dict = None,
nxdl_f_path: str = None,
output_path: str = None,
):
"""Constructs the necessary objects required by the Writer class."""
self.data = data
Expand All @@ -221,6 +228,7 @@ def __init__(
self.nxs_namespace = get_namespace(self.nxdl_data)

self.write_docs: bool = False
self.docs_format: str = "default"

def __nxdl_to_attrs(self, path: str = "/") -> dict:
"""
Expand Down Expand Up @@ -253,9 +261,16 @@ def __nxdl_docs(self, path: str = "/") -> Optional[str]:

def extract_and_format_docs(elem: ET.Element) -> str:
"""Get the docstring for a given element in the NDXL tree."""
docs = elem.findall(f"{self.nxs_namespace}doc")
if docs:
return docs[0].text.strip().replace("\\n", "\n")
docs_elements = elem.findall(f"{self.nxs_namespace}doc")
if docs_elements:
docs = docs_elements[0].text
if self.docs_format != "default":
docs = publish_string(
docs,
writer_name=self.docs_format,
settings_overrides={"warning_stream": io.StringIO()},
).decode("utf-8")
return docs.strip().replace("\\n", "\n")
return ""

docs: str = ""
Expand All @@ -271,12 +286,34 @@ def extract_and_format_docs(elem: ET.Element) -> str:
if app_def_docs:
return app_def_docs

_, _, elist = get_inherited_nodes(nxdl_path, elem=copy.deepcopy(self.nxdl_data))
class_path, nxdl_elem_path, elist = get_inherited_nodes(
nxdl_path, elem=copy.deepcopy(self.nxdl_data)
)

path_to_check = "/ENTRY/INSTRUMENT/ELECTRONANALYSER/energy_resolution" # /physical_quantity" # == "/ENTRY/SAMPLE/flood_gun_current_env/flood_gun"

if nxdl_path == path_to_check:
for thing in [
# path,
# nxdl_path,
# class_path,
# nxdl_elem_path,
# elist
]:
print(thing, "\n")
for elem in elist:
if nxdl_path == path_to_check:
# print(elem.tag)
# print("\t elem.attrib:", elem.attrib.keys())

if elem.tag.endswith(("group", "field", "attribute", "definition")):
concept_path = helpers.get_concept_path_from_elem(elem), "\n"
# print(concept_path)

if not docs:
# Only use docs from superclasses if they are not extended.
docs += extract_and_format_docs(elem)
# print("\n")

if not elist:
# Handle docs for attributeS
Expand Down Expand Up @@ -394,21 +431,22 @@ def add_units_key(dataset, path):
dataset.attrs[entry_name[1:]] = data
if docs:
# Write docs for attributes like <attr>__docs
dataset.attrs[f"{entry_name[1:]}__docs"] = docs
dataset.attrs[f"{entry_name[1:]}_docs"] = docs
except Exception as exc:
raise IOError(
f"Unknown error occured writing the path: {path} "
f"with the following message: {str(exc)}"
) from exc

def write(self, write_docs=False):
def write(self, write_docs: bool = False, docs_format: str = "default"):
"""
Writes the NeXus file with previously validated data from the reader with NXDL attrs.

Args:
write_docs (bool): Write docs for the individual NeXus concepts as HDF5 attributes. The default is False.
"""
self.write_docs = write_docs
self.docs_format = docs_format
try:
self._put_data_into_hdf5()
finally:
Expand Down
Loading