Skip to content

Commit

Permalink
Merge pull request #1491 from FAIRmat-NFDI/name-type-partial-in-dev-t…
Browse files Browse the repository at this point in the history
…ools

`name_type="partial"`  in dev tools
  • Loading branch information
lukaspie authored Oct 17, 2024
2 parents 979ec79 + bdf0ed8 commit ee1078b
Show file tree
Hide file tree
Showing 4 changed files with 371 additions and 89 deletions.
4 changes: 4 additions & 0 deletions dev_tools/docs/anchor_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def write(self):
return
contents = dict(
_metadata=dict(
# datetime=datetime.datetime.now(datetime.UTC).isoformat(),
# the next line is the py3.9 supported way of getting the datetime
# this will become deprecated however in py3.12 for which the
# line above-mentioned is a fix, which however does not work in py3.9
datetime=datetime.datetime.utcnow().isoformat(),
title="NeXus NXDL vocabulary.",
subtitle="Anchors for all NeXus fields, groups, "
Expand Down
29 changes: 27 additions & 2 deletions dev_tools/docs/nxdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,38 @@ def _get_doc_blocks(ns, node):
out_blocks.append("\n".join(out_lines))
return out_blocks

def _handle_multiline_docstring(self, blocks):
link_pattern = re.compile(r"\.\. _([^:]+):(.*)")

links = []
docstring = ""
expanded_blocks = []

for block in blocks:
expanded_blocks += block.split("\n")

for block in expanded_blocks:
if not block:
continue

link_match = link_pattern.search(block)
if link_match is not None:
links.append((link_match.group(1), link_match.group(2).strip()))
else:
docstring += " " + block.strip().replace("\n", " ")

for name, target in links:
docstring = docstring.replace(f"`{name}`_", f"`{name} <{target}>`_")

return docstring

def _get_doc_line(self, ns, node):
blocks = self._get_doc_blocks(ns, node)
if len(blocks) == 0:
return ""
if len(blocks) > 1:
raise Exception(f"Unexpected multi-paragraph doc [{'|'.join(blocks)}]")
return re.sub(r"\n", " ", blocks[0])
return self._handle_multiline_docstring(blocks)
return blocks[0].replace("\n", " ")

def _get_minOccurs(self, node):
"""
Expand Down
144 changes: 139 additions & 5 deletions dev_tools/tests/test_nxdl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
"""

import os
from pathlib import Path

import lxml.etree as ET
import pytest

from ..utils import nxdl_utils as nexus

Expand Down Expand Up @@ -32,8 +33,8 @@ def test_get_nexus_classes_units_attributes():

def test_get_node_at_nxdl_path():
"""Test to verify if we receive the right XML element for a given NXDL path"""
local_dir = os.path.abspath(os.path.dirname(__file__))
nxdl_file_path = os.path.join(local_dir, "./NXtest.nxdl.xml")
local_dir = Path(__file__).resolve().parent
nxdl_file_path = local_dir / "NXtest.nxdl.xml"
elem = ET.parse(nxdl_file_path).getroot()
node = nexus.get_node_at_nxdl_path("/ENTRY/NXODD_name", elem=elem)
assert node.attrib["type"] == "NXdata"
Expand All @@ -48,11 +49,144 @@ def test_get_node_at_nxdl_path():
)
assert node.attrib["name"] == "long_name"

nxdl_file_path = (
local_dir.parent.parent / "contributed_definitions" / "NXiv_temp.nxdl.xml"
)
elem = ET.parse(nxdl_file_path).getroot()
node = nexus.get_node_at_nxdl_path(
"/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem
)
assert node.attrib["name"] == "voltage_controller"

node = nexus.get_node_at_nxdl_path(
"/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller/calibration_time", elem=elem
)
assert node.attrib["name"] == "calibration_time"


def test_get_inherited_nodes():
"""Test to verify if we receive the right XML element list for a given NXDL path."""
local_dir = os.path.abspath(os.path.dirname(__file__))
nxdl_file_path = os.path.join(local_dir, "./NXtest.nxdl.xml")
local_dir = Path(__file__).resolve().parent
nxdl_file_path = local_dir / "NXtest.nxdl.xml"

elem = ET.parse(nxdl_file_path).getroot()
(_, _, elist) = nexus.get_inherited_nodes(nxdl_path="/ENTRY/NXODD_name", elem=elem)
assert len(elist) == 3

nxdl_file_path = (
local_dir.parent.parent / "contributed_definitions" / "NXiv_temp.nxdl.xml"
)

elem = ET.parse(nxdl_file_path).getroot()
(_, _, elist) = nexus.get_inherited_nodes(
nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT", elem=elem
)
assert len(elist) == 3

(_, _, elist) = nexus.get_inherited_nodes(
nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem
)
assert len(elist) == 4

(_, _, elist) = nexus.get_inherited_nodes(
nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller",
nx_name="NXiv_temp",
)
assert len(elist) == 4


@pytest.mark.parametrize(
"hdf_name,concept_name,should_fit",
[
("source_pump", "sourceType", False),
("source_pump", "sourceTYPE", True),
("source pump", "sourceTYPE", False),
("source", "sourceTYPE", False),
("source123", "SOURCE", True),
("1source", "SOURCE", True),
("_source", "SOURCE", True),
("same_name", "same_name", True),
("angular_energy_resolution", "angularNresolution", True),
("angularresolution", "angularNresolution", False),
("Name with some whitespaces in it", "ENTRY", False),
("simple_name", "TEST", True),
(".test", "TEST", False),
],
)
def test_namefitting(hdf_name, concept_name, should_fit):
"""Test namefitting of nexus concept names"""
if should_fit:
assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) > -1
else:
assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) == -1


@pytest.mark.parametrize(
"hdf_name,concept_name, score",
[
("test_name", "TEST_name", 9),
("te_name", "TEST_name", 7),
("my_other_name", "TEST_name", 5),
("test_name", "test_name", 18),
("test_other", "test_name", -1),
("my_fancy_yet_long_name", "my_SOME_name", 8),
("something", "XXXX", 0),
("something", "OTHER", 1),
],
)
def test_namefitting_scores(hdf_name, concept_name, score):
"""Test namefitting of nexus concept names"""
assert nexus.get_nx_namefit(hdf_name, concept_name, name_partial=True) == score


@pytest.mark.parametrize(
"better_fit,better_ref,worse_fit,worse_ref",
[
("sourcetype", "sourceTYPE", "source_pump", "sourceTYPE"),
("source_pump", "sourceTYPE", "source_pump", "TEST"),
],
)
def test_namefitting_precedence(better_fit, better_ref, worse_fit, worse_ref):
"""Test if namefitting follows proper precedence rules"""

assert nexus.get_nx_namefit(
better_fit, better_ref, name_partial=True
) > nexus.get_nx_namefit(worse_fit, worse_ref)


@pytest.mark.parametrize(
"string_obj, decode, expected",
[
# Test with lists of bytes and strings
([b"bytes", "string"], True, ["bytes", "string"]),
([b"bytes", "string"], False, [b"bytes", "string"]),
([b"bytes", b"more_bytes", "string"], True, ["bytes", "more_bytes", "string"]),
(
[b"bytes", b"more_bytes", "string"],
False,
[b"bytes", b"more_bytes", "string"],
),
([b"fixed", b"length", b"strings"], True, ["fixed", "length", "strings"]),
([b"fixed", b"length", b"strings"], False, [b"fixed", b"length", b"strings"]),
# Test with nested lists
([[b"nested1"], [b"nested2"]], True, [["nested1"], ["nested2"]]),
([[b"nested1"], [b"nested2"]], False, [[b"nested1"], [b"nested2"]]),
# Test with bytes
(b"single", True, "single"),
(b"single", False, b"single"),
# Test with str
("single", True, "single"),
("single", False, "single"),
# Test with int
(123, True, 123),
(123, False, 123),
],
)
def test_decode_or_not(string_obj, decode, expected):
# Handle normal cases
result = nexus.decode_or_not(elem=string_obj, decode=decode)
if isinstance(expected, list):
assert isinstance(result, list), f"Expected list, but got {type(result)}"
# Handle all other cases
else:
assert result == expected, f"Failed for {string_obj} with decode={decode}"
Loading

0 comments on commit ee1078b

Please sign in to comment.