From cb5b114671b9c271167d9629b01a1c8f4766b31c Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Sat, 17 Aug 2024 20:15:46 -0500 Subject: [PATCH 1/3] Use h5py_read_string() for reading string types Unfortunately, since h5py 3 came out, it no longer automatically converts string types to strings, and we have to do it manually. We need to use the `h5py_read_string()` compatibility function to do so. Signed-off-by: Patrick Avery --- hexrd/imageseries/load/eiger_stream_v1.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hexrd/imageseries/load/eiger_stream_v1.py b/hexrd/imageseries/load/eiger_stream_v1.py index 0b98d9ebd..188309a73 100644 --- a/hexrd/imageseries/load/eiger_stream_v1.py +++ b/hexrd/imageseries/load/eiger_stream_v1.py @@ -6,6 +6,7 @@ import h5py import numpy as np +from hexrd.utils.compatibility import h5py_read_string from hexrd.utils.hdf5 import unwrap_h5_to_dict from . import ImageSeriesAdapter @@ -116,7 +117,7 @@ def _first_data_entry(self): @property def dtype(self): - return self._first_data_entry['dtype'][()] + return h5py_read_string(self._first_data_entry['dtype']) @property def shape(self): From 47d5616564585140caa18484945142b4b7f95448 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Sat, 17 Aug 2024 20:10:28 -0500 Subject: [PATCH 2/3] Treat arrays with empty shapes as numbers Calling `tolist()` on these types of arrays yields a single number rather than a list. This would cause errors with the yaml dumper since it was expected a list. Instead, we should just follow the route for single numbers and call `.item()` instead. This fixes the issue. Signed-off-by: Patrick Avery --- hexrd/utils/yaml.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hexrd/utils/yaml.py b/hexrd/utils/yaml.py index 88555ec41..41412aaf5 100644 --- a/hexrd/utils/yaml.py +++ b/hexrd/utils/yaml.py @@ -12,9 +12,13 @@ class NumpyToNativeDumper(yaml.SafeDumper): converted to a basic type. """ def represent_data(self, data): - if isinstance(data, np.ndarray): + # Empty shape arrays should be treated as numbers, not arrays. + is_empty_shape_array = ( + isinstance(data, np.ndarray) and data.shape == () + ) + if isinstance(data, np.ndarray) and not is_empty_shape_array: return self.represent_list(data.tolist()) - elif isinstance(data, (np.generic, np.number)): + elif isinstance(data, (np.generic, np.number)) or is_empty_shape_array: item = data.item() if isinstance(item, (np.generic, np.number)): # This means it was not converted successfully. From d9ba404d163210448db6d50076203e8f6ef0f9ca Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Sat, 17 Aug 2024 20:11:58 -0500 Subject: [PATCH 3/3] Save eiger metadata as a yaml string Because the numpy frame-cache format does not yet support nested metadata structures, we cannot save nested metadata as a frame-cache without causing errors. We have some potential solutions to support nested metadata on the horizon. Until that time, let's store the eiger stream metadata as a yaml string, so that sparse arrays can actually be created from it. The metadata is still present, it is just in the form of a yaml string. Signed-off-by: Patrick Avery --- hexrd/imageseries/load/eiger_stream_v1.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/hexrd/imageseries/load/eiger_stream_v1.py b/hexrd/imageseries/load/eiger_stream_v1.py index 188309a73..661cc236e 100644 --- a/hexrd/imageseries/load/eiger_stream_v1.py +++ b/hexrd/imageseries/load/eiger_stream_v1.py @@ -5,9 +5,11 @@ from dectris.compression import decompress import h5py import numpy as np +import yaml from hexrd.utils.compatibility import h5py_read_string from hexrd.utils.hdf5 import unwrap_h5_to_dict +from hexrd.utils.yaml import NumpyToNativeDumper from . import ImageSeriesAdapter from ..imageseriesiter import ImageSeriesIterator @@ -96,8 +98,22 @@ def _load_metadata(self): def _get_metadata(self): d = {} + # First, unwrap the metadata from the HDF5 file into a dict unwrap_h5_to_dict(self.__h5file['/metadata'], d) - return d + + # Because frame cache imageseries do not yet support nested + # metadata structures, we should not use them. The frame cache + # imageseries should, at some point, start allowing for nested + # metadata structures. At that point, we can return to the + # previous way. + # For now instead, we will serialize this nested structure to + # yaml and keep it as a string. This means the metadata is still + # saved and accessible. + metadata = { + 'eiger_metadata_as_yaml': yaml.dump(d, Dumper=NumpyToNativeDumper) + } + + return metadata @property def metadata(self):