diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 66605409b..d30cef06c 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1539,7 +1539,7 @@ def generate_dataset_html(dataset): array_info_dict = get_basic_array_info(dataset) if isinstance(dataset, h5py.Dataset): - + dataset_type = "HDF5 dataset" # get info from hdf5 dataset compressed_size = dataset.id.get_storage_size() if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 @@ -1554,10 +1554,13 @@ def generate_dataset_html(dataset): "Compression opts": dataset.compression_opts, "Compression ratio": compression_ratio, } - array_info_dict.update(hdf5_info_dict) - # generate html repr - repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset") + elif isinstance(dataset, np.ndarray): + dataset_type = "NumPy array" + else: + dataset_type = dataset.__class__.__name__ + + repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type) return repr_html diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 864b34ee9..ce4e8b821 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -707,8 +707,11 @@ def _generate_field_html(self, key, value, level, access_code): return f'
{key}: {value}
' - is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \ - (hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array + # Detects array-like objects that conform to the Array Interface specification + # (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both + # 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'. + # This approach keeps the implementation generic without coupling to specific backends methods + is_array_data = hasattr(value, "shape") and hasattr(value, "dtype") if is_array_data: html_content = self._generate_array_html(value, level + 1) @@ -735,14 +738,29 @@ def _generate_field_html(self, key, value, level, access_code): def _generate_array_html(self, array, level): - """Generates HTML for array data""" + """Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects).""" - read_io = self.get_read_io() # if the Container was read from file, get IO object - if read_io is not None: # Note that sometimes numpy array have a read_io attribute - repr_html = read_io.generate_dataset_html(array) - else: + is_numpy_array = isinstance(array, np.ndarray) + read_io = self.get_read_io() + it_was_read_with_io = read_io is not None + is_data_io = isinstance(array, DataIO) + + if is_numpy_array: array_info_dict = get_basic_array_info(array) repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array") + elif is_data_io: + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO") + elif it_was_read_with_io: + # The backend handles the representation here. Two special cases worth noting: + # 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays + # even when their parent container has an IO + # 2. Data may have been modified after being read from storage + repr_html = read_io.generate_dataset_html(array) + else: # Not sure which object could get here + object_class = array.__class__.__name__ + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, object_class) return f'
{repr_html}
' diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 6b5900384..c21382a2a 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -894,7 +894,7 @@ def convert_bytes_to_str(bytes_size): return basic_array_info_dict -def generate_array_html_repr(backend_info_dict, array, dataset_type=None): +def generate_array_html_repr(array_info_dict, array, dataset_type=None): def html_table(item_dicts) -> str: """ Generates an html table from a dictionary @@ -912,14 +912,22 @@ def html_table(item_dicts) -> str: report += "" return report - array_info_html = html_table(backend_info_dict) + array_info_html = html_table(array_info_dict) repr_html = dataset_type + "
" + array_info_html if dataset_type is not None else array_info_html - if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 - array_size = array.nbytes + # Array like might lack nbytes (h5py < 3.0) or size (DataIO object) + if hasattr(array, "nbytes"): + array_size_bytes = array.nbytes else: - array_size = array.size * array.dtype.itemsize - array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array + if hasattr(array, "size"): + array_size = array.size + else: + import math + array_size = math.prod(array.shape) + array_size_bytes = array_size * array.dtype.itemsize + + # Heuristic for displaying data + array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array if array_is_small: repr_html += "
" + str(np.asarray(array))