Skip to content

Commit

Permalink
Add working validation
Browse files Browse the repository at this point in the history
  • Loading branch information
domna committed Jun 12, 2024
1 parent 470bd7d commit 7fcaadb
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 17 deletions.
5 changes: 1 addition & 4 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
elif log_type == ValidationProblem.MissingRequiredGroup:
logger.warning(f"The required group, {path}, hasn't been supplied.")
elif log_type == ValidationProblem.MissingRequiredField:
logger.warning(
f"The data entry corresponding to {path} is required "
"and hasn't been supplied by the reader.",
)
logger.warning(f"Missing field: {path}")
elif log_type == ValidationProblem.InvalidType:
logger.warning(
f"The value at {path} should be one of: {value}"
Expand Down
3 changes: 3 additions & 0 deletions src/pynxtools/dataconverter/nexus_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ def get_path(self) -> str:
while current_node.parent is not None:
names.insert(0, current_node.name)
current_node = current_node.parent

if self.type == "attribute" and names:
names[-1] = f"@{names[-1]}"
return "/" + "/".join(names)

def search_child_with_name(
Expand Down
45 changes: 32 additions & 13 deletions src/pynxtools/dataconverter/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def validate_hdf_group_against(appdef: str, data: h5py.Group) -> bool:
# Allow for 10000 cache entries. This should be enough for most cases
@cached(
cache=LRUCache(maxsize=10000),
key=lambda path, *_: hashkey(path),
key=lambda path, node_type=None, nx_class=None: hashkey(path),
)
def find_node_for(
path: str, node_type: Optional[str] = None, nx_class: Optional[str] = None
Expand Down Expand Up @@ -107,7 +107,9 @@ def remove_from_req_fields(path: str):
required_fields.remove(path)

def handle_group(path: str, data: h5py.Group):
node = find_node_for(path, data.attrs.get("NX_class"))
node = find_node_for(
path, node_type="group", nx_class=data.attrs.get("NX_class")
)
if node is None:
collector.collect_and_log(
path, ValidationProblem.MissingDocumentation, None
Expand All @@ -123,24 +125,39 @@ def handle_field(path: str, data: h5py.Dataset):
path, ValidationProblem.MissingDocumentation, None
)
return
remove_from_req_fields(path)
remove_from_req_fields(node.get_path())
is_valid_data_field(data[()], node.dtype, path)

units = data.attrs.get("units")
if node.unit is not None:
if units is None:
collector.collect_and_log(
f"{path}/@units", ValidationProblem.MissingUnit, node.unit
)
return
remove_from_req_fields(f"{node.get_path()}/@units")
is_valid_unit(units, node.unit, None)
elif units is not None:
collector.collect_and_log(
f"{entry_name}/{path}/@units",
ValidationProblem.MissingDocumentation,
path,
)

def handle_attributes(path: str, attrs: h5py.AttributeManager):
for attr_name in attrs:
if attr_name in ("NX_class", "units"):
# Ignore special attrs
continue

node = find_node_for(f"{path}/{attr_name}")
if node is None:
collector.collect_and_log(
path, ValidationProblem.MissingDocumentation, None
f"{path}/@{attr_name}", ValidationProblem.MissingDocumentation, None
)
continue
remove_from_req_fields(f"{path}/@{attr_name}")
is_valid_data_field(
attrs.get(attr_name), node.dtype, f"{path}/@{attr_name}"
)

if attr_name == "units":
is_valid_unit(attrs.get(attr_name), node.units, None)
remove_from_req_fields(node.get_path())
is_valid_data_field(attrs.get(attr_name), node.dtype, node.get_path())

def validate(path: str, data: Union[h5py.Group, h5py.Dataset]):
# Namefit name against tree (use recursive caching)
Expand All @@ -151,8 +168,10 @@ def validate(path: str, data: Union[h5py.Group, h5py.Dataset]):

handle_attributes(path, data.attrs)

tree = generate_tree_from(appdef).search_child_with_name("ENTRY")
required_fields = tree.required_fields_and_attrs_names()
appdef = generate_tree_from(appdef)
required_fields = appdef.required_fields_and_attrs_names()
tree = appdef.search_child_with_name("ENTRY")
entry_name = data.name
data.visititems(validate)

for req_field in required_fields:
Expand Down

0 comments on commit 7fcaadb

Please sign in to comment.