Skip to content

Commit

Permalink
Route array representation for HTML (#1206)
Browse files Browse the repository at this point in the history
* small patch to html repr

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* re-organize

* comment request

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* improve docstrings

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Apply suggestions from code review

Co-authored-by: Oliver Ruebel <[email protected]>

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Steph Prince <[email protected]>
Co-authored-by: Ryan Ly <[email protected]>
Co-authored-by: Oliver Ruebel <[email protected]>
  • Loading branch information
5 people authored Nov 23, 2024
1 parent 4fb554a commit 1d3421e
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 17 deletions.
11 changes: 7 additions & 4 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,7 +1539,7 @@ def generate_dataset_html(dataset):

array_info_dict = get_basic_array_info(dataset)
if isinstance(dataset, h5py.Dataset):

dataset_type = "HDF5 dataset"
# get info from hdf5 dataset
compressed_size = dataset.id.get_storage_size()
if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
Expand All @@ -1554,10 +1554,13 @@ def generate_dataset_html(dataset):
"Compression opts": dataset.compression_opts,
"Compression ratio": compression_ratio,
}

array_info_dict.update(hdf5_info_dict)

# generate html repr
repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")
elif isinstance(dataset, np.ndarray):
dataset_type = "NumPy array"
else:
dataset_type = dataset.__class__.__name__

repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type)

return repr_html
32 changes: 25 additions & 7 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,11 @@ def _generate_field_html(self, key, value, level, access_code):
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
(hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array
# Detects array-like objects that conform to the Array Interface specification
# (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both
# 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'.
# This approach keeps the implementation generic without coupling to specific backends methods
is_array_data = hasattr(value, "shape") and hasattr(value, "dtype")

if is_array_data:
html_content = self._generate_array_html(value, level + 1)
Expand All @@ -735,14 +738,29 @@ def _generate_field_html(self, key, value, level, access_code):


def _generate_array_html(self, array, level):
"""Generates HTML for array data"""
"""Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects)."""

read_io = self.get_read_io() # if the Container was read from file, get IO object
if read_io is not None: # Note that sometimes numpy array have a read_io attribute
repr_html = read_io.generate_dataset_html(array)
else:
is_numpy_array = isinstance(array, np.ndarray)
read_io = self.get_read_io()
it_was_read_with_io = read_io is not None
is_data_io = isinstance(array, DataIO)

if is_numpy_array:
array_info_dict = get_basic_array_info(array)
repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")
elif is_data_io:
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO")
elif it_was_read_with_io:
# The backend handles the representation here. Two special cases worth noting:
# 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays
# even when their parent container has an IO
# 2. Data may have been modified after being read from storage
repr_html = read_io.generate_dataset_html(array)
else: # Not sure which object could get here
object_class = array.__class__.__name__
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, object_class)

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

Expand Down
20 changes: 14 additions & 6 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def convert_bytes_to_str(bytes_size):

return basic_array_info_dict

def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
def generate_array_html_repr(array_info_dict, array, dataset_type=None):
def html_table(item_dicts) -> str:
"""
Generates an html table from a dictionary
Expand All @@ -912,14 +912,22 @@ def html_table(item_dicts) -> str:
report += "</table>"
return report

array_info_html = html_table(backend_info_dict)
array_info_html = html_table(array_info_dict)
repr_html = dataset_type + "<br>" + array_info_html if dataset_type is not None else array_info_html

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size = array.nbytes
# Array like might lack nbytes (h5py < 3.0) or size (DataIO object)
if hasattr(array, "nbytes"):
array_size_bytes = array.nbytes
else:
array_size = array.size * array.dtype.itemsize
array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
if hasattr(array, "size"):
array_size = array.size
else:
import math
array_size = math.prod(array.shape)
array_size_bytes = array_size * array.dtype.itemsize

# Heuristic for displaying data
array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small:
repr_html += "<br>" + str(np.asarray(array))

Expand Down

0 comments on commit 1d3421e

Please sign in to comment.