From 0332e7096c19e9bea4b409516601542ca0769c9c Mon Sep 17 00:00:00 2001 From: Joey Kleingers Date: Mon, 29 Jul 2024 10:28:22 -0400 Subject: [PATCH] BUG FIX: PeregrineHDF5Reader file system path and Array Type Fixes (#1025) * BUG FIX: Ensure the path separators are correct for Windows Signed-off-by: Joey Kleingers * Update arrays to be created using the array data type from the HDF5 file. Signed-off-by: Joey Kleingers --------- Signed-off-by: Joey Kleingers --- .../ReadPeregrineHDF5File.py | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/wrapping/python/plugins/NXDataAnalysisToolkit/src/NXDataAnalysisToolkit/ReadPeregrineHDF5File.py b/wrapping/python/plugins/NXDataAnalysisToolkit/src/NXDataAnalysisToolkit/ReadPeregrineHDF5File.py index 9e8d4cf655..ee52e74ec9 100644 --- a/wrapping/python/plugins/NXDataAnalysisToolkit/src/NXDataAnalysisToolkit/ReadPeregrineHDF5File.py +++ b/wrapping/python/plugins/NXDataAnalysisToolkit/src/NXDataAnalysisToolkit/ReadPeregrineHDF5File.py @@ -303,14 +303,14 @@ def _preflight_slice_datasets(self, h5_file_reader: h5py.File, origin: List[floa return Result(errors=[nx.Error(-3001, 'The camera data datasets are empty. Please input the camera data dataset names that this filter should read from the input file, separated by commas.')]) for camera_data_dataset in camera_data_datasets: - camera_data_dataset_path = Path(camera_data_hdf5_parent_path) / camera_data_dataset + camera_data_dataset_path: Path = Path(camera_data_hdf5_parent_path) / camera_data_dataset if dims is None: - dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, str(camera_data_dataset_path)) + dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, camera_data_dataset_path.as_posix()) if dims_result.invalid(): return dims_result dims = dims_result.value else: - dims_result = self._validate_dataset_dimensions(h5_file_reader, str(camera_data_dataset_path), dims) + dims_result = self._validate_dataset_dimensions(h5_file_reader, camera_data_dataset_path.as_posix(), dims) if dims_result.invalid(): return Result(errors=dims_result.errors) @@ -370,23 +370,41 @@ def _preflight_slice_datasets(self, h5_file_reader: h5py.File, origin: List[floa if read_segmentation_results: for segmentation_result in segmentation_results_list: segmentation_result_path: nx.DataPath = slice_data_image_geom_path.create_child_path(slice_data_cell_attr_mat_name).create_child_path('Segmentation Result ' + segmentation_result) - actions.append_action(nx.CreateArrayAction(nx.DataType.uint8, subvolume_dims if read_slices_subvolume else dims, [1], segmentation_result_path)) + segmentation_result_h5_path = Path(ReadPeregrineHDF5File.SEGMENTATION_RESULTS_H5_PARENT_PATH) / segmentation_result + dset_type_result: Result = self._read_dataset_type(h5_file_reader, segmentation_result_h5_path.as_posix()) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), subvolume_dims if read_slices_subvolume else dims, [1], segmentation_result_path)) # Optionally create the camera data arrays if read_camera_data: for camera_data_dataset in camera_data_datasets: camera_data_dataset_path: nx.DataPath = slice_data_image_geom_path.create_child_path(slice_data_cell_attr_mat_name).create_child_path(f"Camera Data {camera_data_dataset}") - actions.append_action(nx.CreateArrayAction(nx.DataType.float32, subvolume_dims if read_slices_subvolume else dims, [1], camera_data_dataset_path)) + camera_data_dataset_h5_path: Path = Path(camera_data_hdf5_parent_path) / camera_data_dataset + dset_type_result: Result = self._read_dataset_type(h5_file_reader, camera_data_dataset_h5_path.as_posix()) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), subvolume_dims if read_slices_subvolume else dims, [1], camera_data_dataset_path)) # Optionally create the part ids data array if read_part_ids: part_ids_path: nx.DataPath = slice_data_image_geom_path.create_child_path(slice_data_cell_attr_mat_name).create_child_path(part_ids_array_name) - actions.append_action(nx.CreateArrayAction(nx.DataType.uint32, subvolume_dims if read_slices_subvolume else dims, [1], part_ids_path)) + dset_type_result: Result = self._read_dataset_type(h5_file_reader, ReadPeregrineHDF5File.PART_IDS_H5_PATH) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), subvolume_dims if read_slices_subvolume else dims, [1], part_ids_path)) # Optionally create the sample ids data array if read_sample_ids: sample_ids_path: nx.DataPath = slice_data_image_geom_path.create_child_path(slice_data_cell_attr_mat_name).create_child_path(sample_ids_array_name) - actions.append_action(nx.CreateArrayAction(nx.DataType.uint32, subvolume_dims if read_slices_subvolume else dims, [1], sample_ids_path)) + dset_type_result: Result = self._read_dataset_type(h5_file_reader, ReadPeregrineHDF5File.SAMPLE_IDS_H5_PATH) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), subvolume_dims if read_slices_subvolume else dims, [1], sample_ids_path)) return Result() @@ -451,11 +469,19 @@ def _preflight_registered_datasets(self, h5_file_reader: h5py.File, origin: List if read_anomaly_detection: anomaly_detection_path: nx.DataPath = registered_data_image_geom_path.create_child_path(registered_data_cell_attr_mat_name).create_child_path(anomaly_detection_array_name) - actions.append_action(nx.CreateArrayAction(nx.DataType.uint8, registered_dims, [1], anomaly_detection_path)) + dset_type_result: Result = self._read_dataset_type(h5_file_reader, ReadPeregrineHDF5File.REGISTERED_ANOMALY_DETECTION_H5_PATH) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), registered_dims, [1], anomaly_detection_path)) if read_x_ray_ct: xray_ct_path: nx.DataPath = registered_data_image_geom_path.create_child_path(registered_data_cell_attr_mat_name).create_child_path(xray_ct_array_name) - actions.append_action(nx.CreateArrayAction(nx.DataType.uint8, registered_dims, [1], xray_ct_path)) + dset_type_result: Result = self._read_dataset_type(h5_file_reader, ReadPeregrineHDF5File.REGISTERED_XRAY_CT_H5_PATH) + if dset_type_result.invalid(): + return dset_type_result + dset_type = dset_type_result.value + actions.append_action(nx.CreateArrayAction(nx.convert_np_dtype_to_datatype(dset_type), registered_dims, [1], xray_ct_path)) return Result() @@ -509,7 +535,7 @@ def _preflight_scan_datasets(self, h5_file_reader: h5py.File, filter_args: dict, num_edges: int = 0 for i in range(z_start, z_end): scan_path = Path(ReadPeregrineHDF5File.SCANS_GROUP_H5_PATH) / str(i) - scan_dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, str(scan_path)) + scan_dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, scan_path.as_posix()) if scan_dims_result.invalid(): return Result(errors=scan_dims_result.errors) scan_dims: List[int] = scan_dims_result.value @@ -544,6 +570,14 @@ def _read_dataset_dimensions(self, h5_file_reader: h5py.File, h5_dataset_path: s dataset: h5py.Dataset = result.value return Result(value=list(dataset.shape)) + + def _read_dataset_type(self, h5_file_reader: h5py.File, h5_dataset_path: str) -> Result[List[int]]: + result: Result[h5py.Dataset] = self._open_hdf5_data_object(h5_file_reader, h5_dataset_path) + if result.invalid(): + return Result(errors=result.errors) + dataset: h5py.Dataset = result.value + + return Result(value=dataset.dtype) def _validate_dataset_dimensions(self, h5_file_reader: h5py.File, h5_dataset_path: str, sliceDims: List[int]) -> Result: dims_result = self._read_dataset_dimensions(h5_file_reader, h5_dataset_path) @@ -559,9 +593,9 @@ def _validate_dataset_dimensions(self, h5_file_reader: h5py.File, h5_dataset_pat def _read_slice_dimensions(self, h5_file_reader: h5py.File, segmentation_results_list: List[str]) -> Result[List[int]]: slice_dims: List[int] = [] for segmentation_result in segmentation_results_list: - segmentation_result_path = ReadPeregrineHDF5File.SEGMENTATION_RESULTS_H5_PARENT_PATH + '/' + segmentation_result + segmentation_result_path = Path(ReadPeregrineHDF5File.SEGMENTATION_RESULTS_H5_PARENT_PATH) / segmentation_result - dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, segmentation_result_path) + dims_result: Result[List[int]] = self._read_dataset_dimensions(h5_file_reader, segmentation_result_path.as_posix()) if dims_result.invalid(): return dims_result @@ -570,7 +604,7 @@ def _read_slice_dimensions(self, h5_file_reader: h5py.File, segmentation_results # Set the slice dimensions for the first time slice_dims = dims else: - result: Result = self._validate_dataset_dimensions(h5_file_reader, segmentation_result_path, slice_dims) + result: Result = self._validate_dataset_dimensions(h5_file_reader, segmentation_result_path.as_posix(), slice_dims) if result.invalid(): return Result(errors=result.errors) @@ -670,7 +704,7 @@ def _read_slice_datasets(self, h5_file_reader: h5py.File, data_structure: nx.Dat segmentation_result_nx = data_structure[segmentation_result_nx_path].npview() segmentation_result_nx = np.squeeze(segmentation_result_nx) segmentation_result_h5_path = Path(ReadPeregrineHDF5File.SEGMENTATION_RESULTS_H5_PARENT_PATH) / segmentation_result - segmentation_result_h5_result: Result[h5py.Dataset] = self._open_hdf5_data_object(h5_file_reader, str(segmentation_result_h5_path)) + segmentation_result_h5_result: Result[h5py.Dataset] = self._open_hdf5_data_object(h5_file_reader, segmentation_result_h5_path.as_posix()) if segmentation_result_h5_result.invalid(): return segmentation_result_h5_result segmentation_result_h5 = segmentation_result_h5_result.value @@ -692,7 +726,7 @@ def _read_slice_datasets(self, h5_file_reader: h5py.File, data_structure: nx.Dat message_handler(nx.IFilter.Message(nx.IFilter.Message.Type.Info, f'Reading Camera Dataset "{camera_data_dataset}"...')) camera_data_nx_path: nx.DataPath = slice_data_image_geom_path.create_child_path(slice_data_cell_attr_mat_name).create_child_path(f"Camera Data {camera_data_dataset}") camera_data_h5_path: Path = Path(camera_data_hdf5_parent_path) / camera_data_dataset - camera_data_h5_result: Result[h5py.Dataset] = self._open_hdf5_data_object(h5_file_reader, str(camera_data_h5_path)) + camera_data_h5_result: Result[h5py.Dataset] = self._open_hdf5_data_object(h5_file_reader, camera_data_h5_path.as_posix()) if camera_data_h5_result.invalid(): return Result(errors=camera_data_h5_result.errors) camera_data_h5 = camera_data_h5_result.value @@ -887,8 +921,8 @@ def _read_scan_datasets(self, h5_file_reader: h5py.File, data_structure: nx.Data # Read the scan data into memory as vertices and edges scan_path = Path(ReadPeregrineHDF5File.SCANS_GROUP_H5_PATH) / str(z) - message_handler(nx.IFilter.Message(nx.IFilter.Message.Type.Info, f"Reading Scan Dataset '{str(scan_path)}' ({z - z_start + 1}/{z_end - z_start + 1})...")) - scan_data_result: Result[Tuple[np.array, np.array, np.array]] = self._read_scan_data(h5_file_reader, str(scan_path), z * z_thickness) + message_handler(nx.IFilter.Message(nx.IFilter.Message.Type.Info, f"Reading Scan Dataset '{scan_path.as_posix()}' ({z - z_start + 1}/{z_end - z_start + 1})...")) + scan_data_result: Result[Tuple[np.array, np.array, np.array]] = self._read_scan_data(h5_file_reader, scan_path.as_posix(), z * z_thickness) if scan_data_result.invalid(): return scan_data_result vertices, edges, tot = scan_data_result.value