Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional link in multireader #472

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 109 additions & 15 deletions src/pynxtools/dataconverter/readers/multi/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def resolve_special_keys(
key: str,
value: Any,
optional_groups_to_remove: List[str],
optional_groups_to_remove_from_links: List[str],
callbacks: ParseJsonCallbacks,
suppress_warning: bool = False,
) -> None:
Expand Down Expand Up @@ -188,6 +189,42 @@ def parse_config_value(value: str) -> Tuple[str, Any]:
return ("", value)
return prefixes[0]

def extract_inner_path(key):
"""
Extracts the relevant path parts from a Nexus concept path by
removing everything outside of brackets and converting it to
a normalized HDF5 target path format.

Args:
key (str): The Nexus concept path, containing bracketed
sections for variable parts.

Returns:
str: A normalized HDF5 target path

Example:
For a Nexus path like
'ENTRY[entry_name]/GROUP[group]/DATA[data]/attribute',
the function will return a normalized HDF5 path like
'entry_name/group/data/attribute'.
"""
key = key.lstrip("ENTRY")

# Use regex to match either bracketed parts or normal path segments
parts = re.findall(r"\[([^\]]+)\]|([^/\[]+)", key)

return "/".join(
filter(
None,
[
item[0]
if item[0]
else (item[1] if item[1] and not item[1].isupper() else "")
for item in parts
],
)
)

# Handle non-keyword values
if not isinstance(value, str) or "@" not in str(value):
new_entry_dict[key] = value
Expand All @@ -212,12 +249,38 @@ def parse_config_value(value: str) -> Tuple[str, Any]:
if new_entry_dict[key] is not None:
break

if isinstance(new_entry_dict[key], dict) and "link" in new_entry_dict[key]:
keys_as_hdf5_paths = {extract_inner_path(key) for key in new_entry_dict.keys()}

link_target = new_entry_dict[key]["link"]

if link_target.lstrip("/") not in keys_as_hdf5_paths:
if value.startswith("!"):
group_to_delete = key.rsplit("/", 1)[0]
if not suppress_warning:
logger.info(
f"Main element {key} not provided (broken link at {link_target}). "
f"Removing the parent group {group_to_delete}."
)
optional_groups_to_remove_from_links.append(group_to_delete)

if not suppress_warning:
logger.info(
f"There was no target at {link_target} "
f"for the optional link defined for {key}. "
f"Removing the link."
)
del new_entry_dict[key]

return

if value.startswith("!") and new_entry_dict[key] is None:
group_to_delete = key.rsplit("/", 1)[0]
logger.info(
f"Main element {key} not provided. "
f"Removing the parent group {group_to_delete}."
)
if not suppress_warning:
logger.info(
f"Main element {key} not provided. "
f"Removing the parent group {group_to_delete}."
)
optional_groups_to_remove.append(group_to_delete)
return

Expand Down Expand Up @@ -256,28 +319,54 @@ def has_missing_main(key: str) -> bool:
return True
return False

def dict_sort_key(keyval: Tuple[str, Any]) -> bool:
def dict_sort_key(keyval: Tuple[str, Any]) -> Tuple[int, str]:
"""
The function to sort the dict by.
This just sets False for keys starting with "!" to put them at the beginning.
Besides, pythons sorted is stable, so this will keep the order of the keys
which have the same sort key.
Sort the dict by:
- Values starting with "link:" or "!link" go last (return 2).
This is for optional links that are first check to work.
- Values starting with "!" but not "!link" go first (return 0).
- All other values are sorted normally (return 1).
"""
if isinstance(keyval[1], str):
return not keyval[1].startswith("!")
return True
value = keyval[1]
if isinstance(value, str):
if value.startswith(("!@link:", "@link:")):
return (2, keyval[0]) # Last
if value.startswith("!"):
return (0, keyval[0]) # First
return (1, keyval[0]) # Middle

def remove_keys_matching_prefixes(d: dict, prefixes: list[str]) -> dict:
"""
Removes all keys from the dictionary that start with any of the specified prefixes.

Args:
d (dict): The original dictionary.
prefixes (list[str]): A list of prefixes to check for.

Returns:
dict: A new dictionary with the matching keys removed.
"""
# Create a new dictionary, keeping only the keys that do not match any prefix
return {
key: value
for key, value in d.items()
if not any(key.startswith(prefix) for prefix in prefixes)
}

if callbacks is None:
# Use default callbacks if none are explicitly provided
callbacks = ParseJsonCallbacks()

optional_groups_to_remove: List[str] = []
optional_groups_to_remove_from_links: List[str] = []
new_entry_dict = {}

# Process '!...' keys first, but optional link last
sorted_keys = dict(sorted(config_dict.items(), key=dict_sort_key))

for entry_name in entry_names:
callbacks.entry_name = entry_name

# Process '!...' keys first
sorted_keys = dict(sorted(config_dict.items(), key=dict_sort_key))
for key in sorted_keys:
value = config_dict[key]
key = key.replace("/ENTRY/", f"/ENTRY[{entry_name}]/")
Expand All @@ -295,6 +384,7 @@ def dict_sort_key(keyval: Tuple[str, Any]) -> bool:
k,
v,
optional_groups_to_remove,
optional_groups_to_remove_from_links,
callbacks,
suppress_warning,
)
Expand All @@ -306,11 +396,15 @@ def dict_sort_key(keyval: Tuple[str, Any]) -> bool:
key,
value,
optional_groups_to_remove,
optional_groups_to_remove_from_links,
callbacks,
suppress_warning,
)

return new_entry_dict
# This removes those groups that had a link with a "!" prefix
return remove_keys_matching_prefixes(
new_entry_dict, optional_groups_to_remove_from_links
)


class MultiFormatReader(BaseReader):
Expand Down
Loading