Skip to content

Commit

Permalink
make link removal default, implement ! notation for links
Browse files Browse the repository at this point in the history
  • Loading branch information
lukaspie committed Dec 4, 2024
1 parent 773e541 commit f7e0853
Showing 1 changed file with 80 additions and 8 deletions.
88 changes: 80 additions & 8 deletions src/pynxtools/dataconverter/readers/multi/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def resolve_special_keys(
key: str,
value: Any,
optional_groups_to_remove: List[str],
optional_groups_to_remove_from_links: List[str],
callbacks: ParseJsonCallbacks,
suppress_warning: bool = False,
) -> None:
Expand Down Expand Up @@ -188,6 +189,42 @@ def parse_config_value(value: str) -> Tuple[str, Any]:
return ("", value)
return prefixes[0]

def extract_inner_path(key):
"""
Extracts the relevant path parts from a Nexus concept path by
removing everything outside of brackets and converting it to
a normalized HDF5 target path format.
Args:
key (str): The Nexus concept path, containing bracketed
sections for variable parts.
Returns:
str: A normalized HDF5 target path
Example:
For a Nexus path like
'ENTRY[entry_name]/GROUP[group]/DATA[data]/attribute',
the function will return a normalized HDF5 path like
'entry_name/group/data/attribute'.
"""
key = key.lstrip("ENTRY")

# Use regex to match either bracketed parts or normal path segments
parts = re.findall(r"\[([^\]]+)\]|([^/\[]+)", key)

return "/".join(
filter(
None,
[
item[0]
if item[0]
else (item[1] if item[1] and not item[1].isupper() else "")
for item in parts
],
)
)

# Handle non-keyword values
if not isinstance(value, str) or "@" not in str(value):
new_entry_dict[key] = value
Expand All @@ -213,17 +250,28 @@ def parse_config_value(value: str) -> Tuple[str, Any]:
break

if isinstance(new_entry_dict[key], dict) and "link" in new_entry_dict[key]:
keys_as_hdf5_paths = {extract_inner_path(key) for key in new_entry_dict.keys()}

link_target = new_entry_dict[key]["link"]
if (
link_target.startswith("!")
and link_target.lstrip("!") not in new_entry_dict
):

if link_target.lstrip("/") not in keys_as_hdf5_paths:
if value.startswith("!"):
group_to_delete = key.rsplit("/", 1)[0]
if not suppress_warning:
logger.info(
f"Main element {key} not provided (broken link at {link_target}). "
f"Removing the parent group {group_to_delete}."
)
optional_groups_to_remove_from_links.append(group_to_delete)

if not suppress_warning:
logger.info(
f"There was no target at {link_target.lstrip('!')} for the optional link defined for {key}. "
f"There was no target at {link_target} "
f"for the optional link defined for {key}. "
f"Removing the link."
)
del new_entry_dict[key]

return

if value.startswith("!") and new_entry_dict[key] is None:
Expand Down Expand Up @@ -274,24 +322,43 @@ def has_missing_main(key: str) -> bool:
def dict_sort_key(keyval: Tuple[str, Any]) -> Tuple[int, str]:
"""
Sort the dict by:
- Values starting with "@link:!" go last (return 2).
- Values starting with "link:" or "!link" go last (return 2).
This is for optional links that are first check to work.
- Values starting with "!" but not "!link" go first (return 0).
- All other values are sorted normally (return 1).
"""
value = keyval[1]
if isinstance(value, str):
if value.startswith("@link:!"):
if value.startswith(("!@link:", "@link:")):
return (2, keyval[0]) # Last
if value.startswith("!"):
return (0, keyval[0]) # First
return (1, keyval[0]) # Middle

def remove_keys_matching_prefixes(d: dict, prefixes: list[str]) -> dict:
"""
Removes all keys from the dictionary that start with any of the specified prefixes.
Args:
d (dict): The original dictionary.
prefixes (list[str]): A list of prefixes to check for.
Returns:
dict: A new dictionary with the matching keys removed.
"""
# Create a new dictionary, keeping only the keys that do not match any prefix
return {
key: value
for key, value in d.items()
if not any(key.startswith(prefix) for prefix in prefixes)
}

if callbacks is None:
# Use default callbacks if none are explicitly provided
callbacks = ParseJsonCallbacks()

optional_groups_to_remove: List[str] = []
optional_groups_to_remove_from_links: List[str] = []
new_entry_dict = {}

# Process '!...' keys first, but optional link last
Expand All @@ -317,6 +384,7 @@ def dict_sort_key(keyval: Tuple[str, Any]) -> Tuple[int, str]:
k,
v,
optional_groups_to_remove,
optional_groups_to_remove_from_links,
callbacks,
suppress_warning,
)
Expand All @@ -328,11 +396,15 @@ def dict_sort_key(keyval: Tuple[str, Any]) -> Tuple[int, str]:
key,
value,
optional_groups_to_remove,
optional_groups_to_remove_from_links,
callbacks,
suppress_warning,
)

return new_entry_dict
# This removes those groups that had a link with a "!" prefix
return remove_keys_matching_prefixes(
new_entry_dict, optional_groups_to_remove_from_links
)


class MultiFormatReader(BaseReader):
Expand Down

0 comments on commit f7e0853

Please sign in to comment.