Skip to content

Commit

Permalink
create tests/utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Apr 4, 2024
1 parent 5f9274d commit c13a2d9
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 147 deletions.
22 changes: 5 additions & 17 deletions tests/test_copy.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
import h5py
import tempfile
import pytest
import lindi
from lindi import LindiH5ZarrStore
from utils import arrays_are_equal


def test_copy_dataset():
Expand All @@ -27,14 +27,14 @@ def test_copy_dataset():
h5f_2.copy("X", h5f_2, "Z")
assert "Z" in h5f_2
assert h5f_2["Z"].attrs['attr1'] == 'value1' # type: ignore
assert _arrays_are_equal(h5f["X"][()], h5f_2["Z"][()]) # type: ignore
assert arrays_are_equal(h5f["X"][()], h5f_2["Z"][()]) # type: ignore
rfs_copy = store.to_reference_file_system()
h5f_3 = lindi.LindiH5pyFile.from_reference_file_system(rfs_copy, mode="r+")
assert "Z" not in h5f_3
h5f_2.copy("X", h5f_3, "Z")
assert "Z" in h5f_3
assert h5f_3["Z"].attrs['attr1'] == 'value1' # type: ignore
assert _arrays_are_equal(h5f["X"][()], h5f_3["Z"][()]) # type: ignore
assert arrays_are_equal(h5f["X"][()], h5f_3["Z"][()]) # type: ignore


def test_copy_group():
Expand All @@ -61,27 +61,15 @@ def test_copy_group():
assert "Z" in h5f_2
assert h5f_2["Z"].attrs['attr1'] == 'value1' # type: ignore
assert "A" in h5f_2["Z"] # type: ignore
assert _arrays_are_equal(h5f["X/A"][()], h5f_2["Z/A"][()]) # type: ignore
assert arrays_are_equal(h5f["X/A"][()], h5f_2["Z/A"][()]) # type: ignore
rfs_copy = store.to_reference_file_system()
h5f_3 = lindi.LindiH5pyFile.from_reference_file_system(rfs_copy, mode="r+")
assert "Z" not in h5f_3
h5f_2.copy("X", h5f_3, "Z")
assert "Z" in h5f_3
assert h5f_3["Z"].attrs['attr1'] == 'value1'
assert "A" in h5f_3["Z"]
assert _arrays_are_equal(h5f["X/A"][()], h5f_3["Z/A"][()]) # type: ignore


def _arrays_are_equal(a, b):
if a.shape != b.shape:
return False
if a.dtype != b.dtype:
return False
# if this is numeric data we need to use allclose so that we can handle NaNs
if np.issubdtype(a.dtype, np.number):
return np.allclose(a, b, equal_nan=True)
else:
return np.array_equal(a, b)
assert arrays_are_equal(h5f["X/A"][()], h5f_3["Z/A"][()]) # type: ignore


if __name__ == '__main__':
Expand Down
63 changes: 20 additions & 43 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
import lindi
from lindi import LindiH5ZarrStore
from utils import arrays_are_equal, lists_are_equal


def test_variety():
Expand Down Expand Up @@ -33,12 +34,12 @@ def test_variety():
assert h5f_2.attrs["float1"] == h5f.attrs["float1"]
assert h5f_2.attrs["str1"] == h5f.attrs["str1"]
assert h5f_2.attrs["bytes1"] == h5f.attrs["bytes1"]
assert _lists_are_equal(h5f_2.attrs["list1"], h5f.attrs["list1"])
assert _lists_are_equal(h5f_2.attrs["tuple1"], h5f.attrs["tuple1"])
assert _arrays_are_equal(np.array(h5f_2.attrs["array1"]), h5f.attrs["array1"])
assert lists_are_equal(h5f_2.attrs["list1"], h5f.attrs["list1"])
assert lists_are_equal(h5f_2.attrs["tuple1"], h5f.attrs["tuple1"])
assert arrays_are_equal(np.array(h5f_2.attrs["array1"]), h5f.attrs["array1"])
assert h5f_2["dataset1"].attrs["test_attr1"] == h5f["dataset1"].attrs["test_attr1"] # type: ignore
assert h5f_2["dataset1"].id
assert _arrays_are_equal(h5f_2["dataset1"][()], h5f["dataset1"][()]) # type: ignore
assert arrays_are_equal(h5f_2["dataset1"][()], h5f["dataset1"][()]) # type: ignore
assert h5f_2["group1"].attrs["test_attr2"] == h5f["group1"].attrs["test_attr2"] # type: ignore
target_1 = h5f[h5f.attrs["dataset1_ref"]]
target_2 = h5f_2[h5f_2.attrs["dataset1_ref"]]
Expand Down Expand Up @@ -85,17 +86,17 @@ def test_soft_links():
assert isinstance(ds1, h5py.Dataset)
ds2 = h5f_2['soft_link']['dataset1'] # type: ignore
assert isinstance(ds2, h5py.Dataset)
assert _arrays_are_equal(ds1[()], ds2[()])
assert arrays_are_equal(ds1[()], ds2[()])
ds1 = h5f['soft_link/dataset1']
assert isinstance(ds1, h5py.Dataset)
ds2 = h5f_2['soft_link/dataset1']
assert isinstance(ds2, h5py.Dataset)
assert _arrays_are_equal(ds1[()], ds2[()])
assert arrays_are_equal(ds1[()], ds2[()])
ds1 = h5f['group_target/dataset1']
assert isinstance(ds1, h5py.Dataset)
ds2 = h5f_2['group_target/dataset1']
assert isinstance(ds2, h5py.Dataset)
assert _arrays_are_equal(ds1[()], ds2[()])
assert arrays_are_equal(ds1[()], ds2[()])


def test_arrays_of_compound_dtype():
Expand All @@ -118,16 +119,16 @@ def test_arrays_of_compound_dtype():
ds1_2 = h5f_2['dataset1']
assert isinstance(ds1_2, h5py.Dataset)
assert ds1_1.dtype == ds1_2.dtype
assert _arrays_are_equal(ds1_1['x'][()], ds1_2['x'][()]) # type: ignore
assert _arrays_are_equal(ds1_1['y'][()], ds1_2['y'][()]) # type: ignore
assert arrays_are_equal(ds1_1['x'][()], ds1_2['x'][()]) # type: ignore
assert arrays_are_equal(ds1_1['y'][()], ds1_2['y'][()]) # type: ignore
ds2_1 = h5f['dataset2']
assert isinstance(ds2_1, h5py.Dataset)
ds2_2 = h5f_2['dataset2']
assert isinstance(ds2_2, h5py.Dataset)
assert ds2_1.dtype == ds2_2.dtype
assert _arrays_are_equal(ds2_1['a'][()], ds2_2['a'][()]) # type: ignore
assert _arrays_are_equal(ds2_1['b'][()], ds2_2['b'][()]) # type: ignore
assert _arrays_are_equal(ds2_1['c'][()], ds2_2['c'][()]) # type: ignore
assert arrays_are_equal(ds2_1['a'][()], ds2_2['a'][()]) # type: ignore
assert arrays_are_equal(ds2_1['b'][()], ds2_2['b'][()]) # type: ignore
assert arrays_are_equal(ds2_1['c'][()], ds2_2['c'][()]) # type: ignore
ds3_1 = h5f['dataset3']
assert isinstance(ds3_1, h5py.Dataset)
ds3_2 = h5f_2['dataset3']
Expand Down Expand Up @@ -157,7 +158,7 @@ def test_arrays_of_compound_dtype_with_references():
ds1_2 = h5f_2['dataset1']
assert isinstance(ds1_2, h5py.Dataset)
assert ds1_1.dtype == ds1_2.dtype
assert _arrays_are_equal(ds1_1['x'][()], ds1_2['x'][()]) # type: ignore
assert arrays_are_equal(ds1_1['x'][()], ds1_2['x'][()]) # type: ignore
ref1 = ds1_1['y'][0]
ref2 = ds1_2['y'][0]
assert isinstance(ref1, h5py.Reference)
Expand All @@ -166,7 +167,7 @@ def test_arrays_of_compound_dtype_with_references():
assert isinstance(target1, h5py.Dataset)
target2 = h5f_2[ref2]
assert isinstance(target2, h5py.Dataset)
assert _arrays_are_equal(target1[()], target2[()])
assert arrays_are_equal(target1[()], target2[()])


def test_scalar_arrays():
Expand Down Expand Up @@ -219,7 +220,7 @@ def test_arrays_of_strings():
assert isinstance(X1, h5py.Dataset)
X2 = h5f_2['X']
assert isinstance(X2, h5py.Dataset)
assert _lists_are_equal(X1[:].tolist(), [x.encode() for x in X2[:]]) # type: ignore
assert lists_are_equal(X1[:].tolist(), [x.encode() for x in X2[:]]) # type: ignore


def test_numpy_arrays():
Expand Down Expand Up @@ -273,13 +274,13 @@ def test_nan_inf_attributes():
assert isinstance(nanval, float) and np.isnan(nanval)
assert X1.attrs["inf"] == np.inf
assert X1.attrs["ninf"] == -np.inf
assert _lists_are_equal(X1.attrs['float_list'], [np.nan, np.inf, -np.inf, 23])
assert lists_are_equal(X1.attrs['float_list'], [np.nan, np.inf, -np.inf, 23])

nanval = X2.attrs["nan"]
assert isinstance(nanval, float) and np.isnan(nanval)
assert X2.attrs["inf"] == np.inf
assert X2.attrs["ninf"] == -np.inf
assert _lists_are_equal(X2.attrs['float_list'], [np.nan, np.inf, -np.inf, 23])
assert lists_are_equal(X2.attrs['float_list'], [np.nan, np.inf, -np.inf, 23])

for test_string in ["NaN", "Infinity", "-Infinity", "Not-illegal"]:
filename = f"{tmpdir}/illegal_string.h5"
Expand Down Expand Up @@ -307,7 +308,7 @@ def test_reference_file_system_to_file():
client = lindi.LindiH5pyFile.from_reference_file_system(rfs_fname)
X = client["X"]
assert isinstance(X, lindi.LindiH5pyDataset)
assert _lists_are_equal(X[()], [1, 2, 3])
assert lists_are_equal(X[()], [1, 2, 3])


def test_lindi_reference_file_system_store():
Expand Down Expand Up @@ -483,31 +484,7 @@ def test_numpy_array_of_byte_strings():
assert isinstance(X1, h5py.Dataset)
X2 = h5f_2['X']
assert isinstance(X2, h5py.Dataset)
assert _lists_are_equal(X1[:].tolist(), X2[:].tolist()) # type: ignore


def _lists_are_equal(a, b):
if len(a) != len(b):
return False
for aa, bb in zip(a, b):
if aa != bb:
if np.isnan(aa) and np.isnan(bb):
# nan != nan, but we want to consider them equal
continue
return False
return True


def _arrays_are_equal(a, b):
if a.shape != b.shape:
return False
if a.dtype != b.dtype:
return False
# if this is numeric data we need to use allclose so that we can handle NaNs
if np.issubdtype(a.dtype, np.number):
return np.allclose(a, b, equal_nan=True)
else:
return np.array_equal(a, b)
assert lists_are_equal(X1[:].tolist(), X2[:].tolist()) # type: ignore


if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions tests/test_remote_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import pytest
import lindi
from test_core import _arrays_are_equal
from utils import arrays_are_equal


@pytest.mark.network
Expand Down Expand Up @@ -73,7 +73,7 @@ def test_remote_data_rfs_copy():

ds2 = client2['copied_data1']
assert isinstance(ds2, lindi.LindiH5pyDataset)
assert _arrays_are_equal(ds[()], ds2[()]) # make sure the data is the same
assert arrays_are_equal(ds[()], ds2[()]) # make sure the data is the same

# This next dataset has an _EXTERNAL_ARRAY_LINK which means it has a pointer
# to a dataset in a remote h5py
Expand All @@ -88,7 +88,7 @@ def test_remote_data_rfs_copy():

ds2 = client2['copied_data2']
assert isinstance(ds2, lindi.LindiH5pyDataset)
assert _arrays_are_equal(ds[100000:100010], ds2[100000:100010])
assert arrays_are_equal(ds[100000:100010], ds2[100000:100010])


if __name__ == "__main__":
Expand Down
23 changes: 8 additions & 15 deletions tests/test_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import h5py
import tempfile
import lindi
from utils import lists_are_equal


def test_store():
Expand All @@ -14,11 +15,11 @@ def test_store():
with lindi.LindiH5ZarrStore.from_file(filename, url=filename) as store:
store.to_file(f"{tmpdir}/test.zarr.json") # for coverage
a = store.listdir('')
assert _lists_are_equal(a, ['dataset1', 'group1'], ordered=False)
assert _lists_are_equal_as_sets(a, ['dataset1', 'group1'])
b = store.listdir('group1')
assert _lists_are_equal(b, ['group2', 'dataset2'], ordered=False)
assert _lists_are_equal_as_sets(b, ['group2', 'dataset2'])
c = store.listdir('group1/group2')
assert _lists_are_equal(c, [], ordered=False)
assert _lists_are_equal_as_sets(c, [])
assert '.zattrs' in store
assert '.zgroup' in store
assert 'dataset1' not in store
Expand All @@ -41,18 +42,10 @@ def test_store():
assert 'group1/dataset2/0' in store
client = lindi.LindiH5pyFile.from_zarr_store(store)
X = client["dataset1"][:] # type: ignore
assert _lists_are_equal(X, [1, 2, 3], ordered=True)
assert lists_are_equal(X, [1, 2, 3])
Y = client["group1/dataset2"][:] # type: ignore
assert _lists_are_equal(Y, [4, 5, 6], ordered=True)
assert lists_are_equal(Y, [4, 5, 6])


def _lists_are_equal(a, b, ordered: bool):
if ordered:
if len(a) != len(b):
return False
for i in range(len(a)):
if a[i] != b[i]:
return False
return True
else:
return set(a) == set(b)
def _lists_are_equal_as_sets(a, b):
return set(a) == set(b)
71 changes: 2 additions & 69 deletions tests/test_zarr_write.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import Union
import tempfile
import numpy as np
import zarr
import h5py
import lindi
from lindi.conversion.attr_conversion import h5_to_zarr_attr
import pytest
from utils import assert_groups_equal


def test_zarr_write():
Expand Down Expand Up @@ -77,73 +76,7 @@ def compare_example_h5_data(h5f: h5py.File, tmpdir: str):
with h5py.File(f'{tmpdir}/for_comparison.h5', 'w') as h5f2:
write_example_h5_data(h5f2)
with h5py.File(f'{tmpdir}/for_comparison.h5', 'r') as h5f2:
_assert_groups_equal(h5f, h5f2)


def _assert_groups_equal(h5f: h5py.Group, h5f2: h5py.Group):
print(f'Comparing groups: {h5f.name}')
_assert_attrs_equal(h5f, h5f2)
for k in h5f.keys():
X1 = h5f[k]
X2 = h5f2[k]
if isinstance(X1, h5py.Group):
assert isinstance(X2, h5py.Group)
_assert_groups_equal(X1, X2)
elif isinstance(X1, h5py.Dataset):
assert isinstance(X2, h5py.Dataset)
_assert_datasets_equal(X1, X2)
else:
raise Exception(f'Unexpected type: {type(X1)}')

for k in h5f2.keys():
if k not in h5f:
raise Exception(f'Key {k} not found in h5f')


def _assert_datasets_equal(h5d1: h5py.Dataset, h5d2: h5py.Dataset):
print(f'Comparing datasets: {h5d1.name}')
assert h5d1.shape == h5d2.shape, f'h5d1.shape: {h5d1.shape}, h5d2.shape: {h5d2.shape}'
assert h5d1.dtype == h5d2.dtype, f'h5d1.dtype: {h5d1.dtype}, h5d2.dtype: {h5d2.dtype}'
if h5d1.dtype.kind == 'V':
for name in h5d1.dtype.names:
data1 = h5d1[name][()]
data2 = h5d2[name][()]
assert _arrays_are_equal(data1, data2), f'data1: {data1}, data2: {data2}'
else:
data1 = h5d1[()]
data2 = h5d2[()]
assert _arrays_are_equal(data1, data2), f'data1: {data1}, data2: {data2}'


def _arrays_are_equal(a, b):
if a.shape != b.shape:
return False
if a.dtype != b.dtype:
return False
# if this is numeric data we need to use allclose so that we can handle NaNs
if np.issubdtype(a.dtype, np.number):
return np.allclose(a, b, equal_nan=True)
else:
return np.array_equal(a, b)


def _assert_attrs_equal(
h5f1: Union[h5py.Group, h5py.Dataset],
h5f2: Union[h5py.Group, h5py.Dataset]
):
attrs1 = h5f1.attrs
attrs2 = h5f2.attrs
keys1 = set(attrs1.keys())
keys2 = set(attrs2.keys())
assert keys1 == keys2, f'keys1: {keys1}, keys2: {keys2}'
for k1, v1 in attrs1.items():
_assert_attr_equal(v1, attrs2[k1])


def _assert_attr_equal(v1, v2):
v1_normalized = h5_to_zarr_attr(v1, h5f=None)
v2_normalized = h5_to_zarr_attr(v2, h5f=None)
assert v1_normalized == v2_normalized, f'v1_normalized: {v1_normalized}, v2_normalized: {v2_normalized}'
assert_groups_equal(h5f, h5f2)


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit c13a2d9

Please sign in to comment.