Merge pull request #1491 from FAIRmat-NFDI/name-type-partial-in-dev-t…

…ools `name_type="partial"` in dev tools
nexusformat · Oct 17, 2024 · ee1078b · ee1078b
2 parents 979ec79 + bdf0ed8
commit ee1078b
Show file tree

Hide file tree

Showing 4 changed files with 371 additions and 89 deletions.
diff --git a/dev_tools/docs/anchor_list.py b/dev_tools/docs/anchor_list.py
@@ -114,6 +114,10 @@ def write(self):
             return
         contents = dict(
             _metadata=dict(
+                # datetime=datetime.datetime.now(datetime.UTC).isoformat(),
+                # the next line is the py3.9 supported way of getting the datetime
+                # this will become deprecated however in py3.12 for which the
+                # line above-mentioned is a fix, which however does not work in py3.9
                 datetime=datetime.datetime.utcnow().isoformat(),
                 title="NeXus NXDL vocabulary.",
                 subtitle="Anchors for all NeXus fields, groups, "

diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py
@@ -311,13 +311,38 @@ def _get_doc_blocks(ns, node):
             out_blocks.append("\n".join(out_lines))
         return out_blocks
 
+    def _handle_multiline_docstring(self, blocks):
+        link_pattern = re.compile(r"\.\. _([^:]+):(.*)")
+
+        links = []
+        docstring = ""
+        expanded_blocks = []
+
+        for block in blocks:
+            expanded_blocks += block.split("\n")
+
+        for block in expanded_blocks:
+            if not block:
+                continue
+
+            link_match = link_pattern.search(block)
+            if link_match is not None:
+                links.append((link_match.group(1), link_match.group(2).strip()))
+            else:
+                docstring += " " + block.strip().replace("\n", " ")
+
+        for name, target in links:
+            docstring = docstring.replace(f"`{name}`_", f"`{name} <{target}>`_")
+
+        return docstring
+
     def _get_doc_line(self, ns, node):
         blocks = self._get_doc_blocks(ns, node)
         if len(blocks) == 0:
             return ""
         if len(blocks) > 1:
-            raise Exception(f"Unexpected multi-paragraph doc [{'|'.join(blocks)}]")
-        return re.sub(r"\n", " ", blocks[0])
+            return self._handle_multiline_docstring(blocks)
+        return blocks[0].replace("\n", " ")
 
     def _get_minOccurs(self, node):
         """

diff --git a/dev_tools/tests/test_nxdl_utils.py b/dev_tools/tests/test_nxdl_utils.py
@@ -2,9 +2,10 @@
 
 """
 
-import os
+from pathlib import Path
 
 import lxml.etree as ET
+import pytest
 
 from ..utils import nxdl_utils as nexus
 
@@ -32,8 +33,8 @@ def test_get_nexus_classes_units_attributes():
 
 def test_get_node_at_nxdl_path():
     """Test to verify if we receive the right XML element for a given NXDL path"""
-    local_dir = os.path.abspath(os.path.dirname(__file__))
-    nxdl_file_path = os.path.join(local_dir, "./NXtest.nxdl.xml")
+    local_dir = Path(__file__).resolve().parent
+    nxdl_file_path = local_dir / "NXtest.nxdl.xml"
     elem = ET.parse(nxdl_file_path).getroot()
     node = nexus.get_node_at_nxdl_path("/ENTRY/NXODD_name", elem=elem)
     assert node.attrib["type"] == "NXdata"
@@ -48,11 +49,144 @@ def test_get_node_at_nxdl_path():
     )
     assert node.attrib["name"] == "long_name"
 
+    nxdl_file_path = (
+        local_dir.parent.parent / "contributed_definitions" / "NXiv_temp.nxdl.xml"
+    )
+    elem = ET.parse(nxdl_file_path).getroot()
+    node = nexus.get_node_at_nxdl_path(
+        "/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem
+    )
+    assert node.attrib["name"] == "voltage_controller"
+
+    node = nexus.get_node_at_nxdl_path(
+        "/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller/calibration_time", elem=elem
+    )
+    assert node.attrib["name"] == "calibration_time"
+
 
 def test_get_inherited_nodes():
     """Test to verify if we receive the right XML element list for a given NXDL path."""
-    local_dir = os.path.abspath(os.path.dirname(__file__))
-    nxdl_file_path = os.path.join(local_dir, "./NXtest.nxdl.xml")
+    local_dir = Path(__file__).resolve().parent
+    nxdl_file_path = local_dir / "NXtest.nxdl.xml"
+
     elem = ET.parse(nxdl_file_path).getroot()
     (_, _, elist) = nexus.get_inherited_nodes(nxdl_path="/ENTRY/NXODD_name", elem=elem)
     assert len(elist) == 3
+
+    nxdl_file_path = (
+        local_dir.parent.parent / "contributed_definitions" / "NXiv_temp.nxdl.xml"
+    )
+
+    elem = ET.parse(nxdl_file_path).getroot()
+    (_, _, elist) = nexus.get_inherited_nodes(
+        nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT", elem=elem
+    )
+    assert len(elist) == 3
+
+    (_, _, elist) = nexus.get_inherited_nodes(
+        nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem
+    )
+    assert len(elist) == 4
+
+    (_, _, elist) = nexus.get_inherited_nodes(
+        nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller",
+        nx_name="NXiv_temp",
+    )
+    assert len(elist) == 4
+
+
+@pytest.mark.parametrize(
+    "hdf_name,concept_name,should_fit",
+    [
+        ("source_pump", "sourceType", False),
+        ("source_pump", "sourceTYPE", True),
+        ("source pump", "sourceTYPE", False),
+        ("source", "sourceTYPE", False),
+        ("source123", "SOURCE", True),
+        ("1source", "SOURCE", True),
+        ("_source", "SOURCE", True),
+        ("same_name", "same_name", True),
+        ("angular_energy_resolution", "angularNresolution", True),
+        ("angularresolution", "angularNresolution", False),
+        ("Name with some whitespaces in it", "ENTRY", False),
+        ("simple_name", "TEST", True),
+        (".test", "TEST", False),
+    ],
+)
+def test_namefitting(hdf_name, concept_name, should_fit):
+    """Test namefitting of nexus concept names"""
+    if should_fit:
+        assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) > -1
+    else:
+        assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) == -1
+
+
+@pytest.mark.parametrize(
+    "hdf_name,concept_name, score",
+    [
+        ("test_name", "TEST_name", 9),
+        ("te_name", "TEST_name", 7),
+        ("my_other_name", "TEST_name", 5),
+        ("test_name", "test_name", 18),
+        ("test_other", "test_name", -1),
+        ("my_fancy_yet_long_name", "my_SOME_name", 8),
+        ("something", "XXXX", 0),
+        ("something", "OTHER", 1),
+    ],
+)
+def test_namefitting_scores(hdf_name, concept_name, score):
+    """Test namefitting of nexus concept names"""
+    assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) == score
+
+
+@pytest.mark.parametrize(
+    "better_fit,better_ref,worse_fit,worse_ref",
+    [
+        ("sourcetype", "sourceTYPE", "source_pump", "sourceTYPE"),
+        ("source_pump", "sourceTYPE", "source_pump", "TEST"),
+    ],
+)
+def test_namefitting_precedence(better_fit, better_ref, worse_fit, worse_ref):
+    """Test if namefitting follows proper precedence rules"""
+
+    assert nexus.get_nx_namefit(
+        better_fit, better_ref, name_partial=True
+    ) > nexus.get_nx_namefit(worse_fit, worse_ref)
+
+
+@pytest.mark.parametrize(
+    "string_obj, decode, expected",
+    [
+        # Test with lists of bytes and strings
+        ([b"bytes", "string"], True, ["bytes", "string"]),
+        ([b"bytes", "string"], False, [b"bytes", "string"]),
+        ([b"bytes", b"more_bytes", "string"], True, ["bytes", "more_bytes", "string"]),
+        (
+            [b"bytes", b"more_bytes", "string"],
+            False,
+            [b"bytes", b"more_bytes", "string"],
+        ),
+        ([b"fixed", b"length", b"strings"], True, ["fixed", "length", "strings"]),
+        ([b"fixed", b"length", b"strings"], False, [b"fixed", b"length", b"strings"]),
+        # Test with nested lists
+        ([[b"nested1"], [b"nested2"]], True, [["nested1"], ["nested2"]]),
+        ([[b"nested1"], [b"nested2"]], False, [[b"nested1"], [b"nested2"]]),
+        # Test with bytes
+        (b"single", True, "single"),
+        (b"single", False, b"single"),
+        # Test with str
+        ("single", True, "single"),
+        ("single", False, "single"),
+        # Test with int
+        (123, True, 123),
+        (123, False, 123),
+    ],
+)
+def test_decode_or_not(string_obj, decode, expected):
+    # Handle normal cases
+    result = nexus.decode_or_not(elem=string_obj, decode=decode)
+    if isinstance(expected, list):
+        assert isinstance(result, list), f"Expected list, but got {type(result)}"
+    # Handle all other cases
+    else:
+        assert result == expected, f"Failed for {string_obj} with decode={decode}"