From f74ef5998bc2092a3c35739761720e0e105e61ae Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Tue, 19 Mar 2024 16:56:41 -0400 Subject: [PATCH] fix refs --- devel/test_load_pynwb.py | 44 ++++++++++------------ lindi/LindiH5ZarrStore/LindiH5ZarrStore.py | 2 +- lindi/LindiH5pyFile/LindiH5pyDataset.py | 10 ++++- lindi/LindiH5pyFile/LindiH5pyFile.py | 23 ++++++++++- lindi/LindiH5pyFile/LindiH5pyGroup.py | 8 +--- tests/test_with_real_data.py | 4 ++ 6 files changed, 57 insertions(+), 34 deletions(-) diff --git a/devel/test_load_pynwb.py b/devel/test_load_pynwb.py index e3ab17b..709e2c1 100644 --- a/devel/test_load_pynwb.py +++ b/devel/test_load_pynwb.py @@ -5,11 +5,12 @@ import h5py import lindi import json +import remfile def test_load_pynwb(): # https://neurosift.app/?p=/nwb&dandisetId=000939&dandisetVersion=0.240318.1555&url=https://api.dandiarchive.org/api/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/download/ - # url_nwb = "https://api.dandiarchive.org/api/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/download/" + url_nwb = "https://api.dandiarchive.org/api/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/download/" url = "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" thisdir = os.path.dirname(os.path.abspath(__file__)) @@ -17,32 +18,20 @@ def test_load_pynwb(): if not os.path.exists(fname): _download_file(url, fname) - # remf = remfile.File(url_nwb) - # h5f0 = h5py.File(remf, mode="r") - h5f0 = h5py.File("/home/magland/test.nwb", mode="r") + remf = remfile.File(url_nwb) + h5f0 = h5py.File(remf, mode="r") h5f = lindi.LindiH5pyFile.from_h5py_file(h5f0) - with pynwb.NWBHDF5IO(file=h5f, mode="r") as io: - nwb = io.read() - print(nwb) - for k in nwb.fields: - print( - f"________________________________ {k} __________________________________" - ) - print(getattr(nwb, k)) - print("-------------------------------------------") - store = lindi.LindiH5ZarrStore.from_file( - "/home/magland/test.nwb", url="/home/magland/test.nwb" - ) + store = lindi.LindiH5ZarrStore.from_file(url_nwb, url=url_nwb) rfs = store.to_reference_file_system() with open("test_rfs.zarr.json", "w") as f: json.dump(rfs, f, indent=2) hf5_rfs = lindi.LindiH5pyFile.from_reference_file_system(rfs) - _compare_h5py_files(h5f0, hf5_rfs) + _compare_h5py_files(h5f, hf5_rfs) - with pynwb.NWBHDF5IO(file=hf5_rfs, mode="r") as io: - nwb = io.read() + with pynwb.NWBHDF5IO(file=hf5_rfs, mode="r") as io1: + nwb = io1.read() print(nwb) for k in nwb.fields: print( @@ -115,25 +104,25 @@ def _compare_h5py_groups(g1: h5py.Group, g2: h5py.Group, label: str): if isinstance(obj1, h5py.Group): obj1x = g1.get(k, getlink=True) obj2x = g2.get(k, getlink=True) - if isinstance(obj1x, h5py.SoftLink): - if isinstance(obj2x, lindi.LindiH5pySoftLink): + if isinstance(obj1x, h5py.SoftLink) or isinstance(obj1x, lindi.LindiH5pySoftLink): + if isinstance(obj2x, h5py.SoftLink) or isinstance(obj2x, lindi.LindiH5pySoftLink): pass else: print(f"*************** Link type mismatch for {k}") print(type(obj1x)) print(type(obj2x)) - elif isinstance(obj1x, h5py.HardLink): - if isinstance(obj2x, lindi.LindiH5pyHardLink): + elif isinstance(obj1x, h5py.HardLink) or isinstance(obj1x, lindi.LindiH5pyHardLink): + if isinstance(obj2x, h5py.HardLink) or isinstance(obj2x, lindi.LindiH5pyHardLink): pass else: print(f"*************** Hard link type mismatch for {k}") print(type(obj1x)) print(type(obj2x)) - elif isinstance(obj2x, lindi.LindiH5pySoftLink): + elif isinstance(obj2x, h5py.SoftLink) or isinstance(obj2x, lindi.LindiH5pySoftLink): print(f"*************** Link type mismatch for {k}") print(type(obj1x)) print(type(obj2x)) - elif isinstance(obj2x, lindi.LindiH5pyHardLink): + elif isinstance(obj2x, h5py.HardLink) or isinstance(obj2x, lindi.LindiH5pyHardLink): print(f"*************** Link type mismatch for {k}") print(type(obj1x)) print(type(obj2x)) @@ -159,6 +148,11 @@ def _compare_h5py_datasets(d1: h5py.Dataset, d2: h5py.Dataset, label: str): print("*************** Ndim mismatch") if d1.maxshape != d2.maxshape: print("*************** Maxshape mismatch") + if d1.size and d1.size < 100: + if not _check_equal(d1[()], d2[()]): + print("*************** Data mismatch") + print(f" h5f1: {d1[()].ravel()[:5]}") + print(f" h5f2: {d2[()].ravel()[:5]}") def _download_file(url, fname): diff --git a/lindi/LindiH5ZarrStore/LindiH5ZarrStore.py b/lindi/LindiH5ZarrStore/LindiH5ZarrStore.py index 0c35ffa..40b2a48 100644 --- a/lindi/LindiH5ZarrStore/LindiH5ZarrStore.py +++ b/lindi/LindiH5ZarrStore/LindiH5ZarrStore.py @@ -389,7 +389,7 @@ def listdir(self, path: str = "") -> List[str]: if self._h5f is None: raise Exception("Store is closed") try: - item = self._h5f[path] + item = self._h5f['/' + path] except KeyError: return [] if isinstance(item, h5py.Group): diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py index 88ac619..97dc0a0 100644 --- a/lindi/LindiH5pyFile/LindiH5pyDataset.py +++ b/lindi/LindiH5pyFile/LindiH5pyDataset.py @@ -1,7 +1,10 @@ from typing import TYPE_CHECKING, Union import h5py + from .LindiH5pyAttributes import LindiH5pyAttributes +from .LindiH5pyReference import LindiH5pyReference from ..LindiZarrWrapper import LindiZarrWrapperDataset +from ..LindiZarrWrapper import LindiZarrWrapperReference if TYPE_CHECKING: @@ -61,4 +64,9 @@ def attrs(self): # type: ignore return LindiH5pyAttributes(self._dataset_object.attrs) def __getitem__(self, args, new_dtype=None): - return self._dataset_object.__getitem__(args, new_dtype) + ret = self._dataset_object.__getitem__(args, new_dtype) + if isinstance(self._dataset_object, LindiZarrWrapperDataset): + if isinstance(ret, dict): + if '_REFERENCE' in ret: + ret = LindiH5pyReference(LindiZarrWrapperReference(ret['_REFERENCE'])) + return ret diff --git a/lindi/LindiH5pyFile/LindiH5pyFile.py b/lindi/LindiH5pyFile/LindiH5pyFile.py index 0259b2a..d1e8bf7 100644 --- a/lindi/LindiH5pyFile/LindiH5pyFile.py +++ b/lindi/LindiH5pyFile/LindiH5pyFile.py @@ -1,9 +1,12 @@ from typing import Union import h5py import zarr + from .LindiH5pyGroup import LindiH5pyGroup -from ..LindiZarrWrapper import LindiZarrWrapper +from .LindiH5pyDataset import LindiH5pyDataset +from ..LindiZarrWrapper import LindiZarrWrapper, LindiZarrWrapperGroup, LindiZarrWrapperDataset from .LindiH5pyAttributes import LindiH5pyAttributes +from .LindiH5pyReference import LindiH5pyReference class LindiH5pyFile(h5py.File): @@ -96,6 +99,24 @@ def __repr__(self): # Group methods def __getitem__(self, name): + if isinstance(name, LindiH5pyReference): + assert isinstance(self._file_object, LindiZarrWrapper) + x = self._file_object[name._reference] + if isinstance(x, LindiZarrWrapperGroup): + return LindiH5pyGroup(x, self) + elif isinstance(x, LindiZarrWrapperDataset): + return LindiH5pyDataset(x, self) + else: + raise Exception(f"Unexpected type for resolved reference at path {name}: {type(x)}") + elif isinstance(name, h5py.Reference): + assert isinstance(self._file_object, h5py.File) + x = self._file_object[name] + if isinstance(x, h5py.Group): + return LindiH5pyGroup(x, self) + elif isinstance(x, h5py.Dataset): + return LindiH5pyDataset(x, self) + else: + raise Exception(f"Unexpected type for resolved reference at path {name}: {type(x)}") return self._the_group[name] def get(self, name, default=None, getclass=False, getlink=False): diff --git a/lindi/LindiH5pyFile/LindiH5pyGroup.py b/lindi/LindiH5pyFile/LindiH5pyGroup.py index b85fd71..7926bfc 100644 --- a/lindi/LindiH5pyFile/LindiH5pyGroup.py +++ b/lindi/LindiH5pyFile/LindiH5pyGroup.py @@ -6,7 +6,6 @@ from .LindiH5pyLink import LindiH5pyHardLink, LindiH5pySoftLink from ..LindiZarrWrapper import LindiZarrWrapperGroup from .LindiH5pyAttributes import LindiH5pyAttributes -from .LindiH5pyReference import LindiH5pyReference if TYPE_CHECKING: @@ -25,7 +24,7 @@ def __init__(self, _group_object: Union[h5py.Group, LindiZarrWrapperGroup], _fil def __getitem__(self, name): if isinstance(self._group_object, h5py.Group): - if isinstance(name, h5py.h5r.Reference) or isinstance(name, (bytes, str)): + if isinstance(name, (bytes, str)): x = self._group_object[name] else: raise TypeError( @@ -39,10 +38,7 @@ def __getitem__(self, name): else: raise Exception(f"Unknown type: {type(x)}") elif isinstance(self._group_object, LindiZarrWrapperGroup): - if isinstance(name, LindiH5pyReference): - # is this the right thing to do? - x = self._group_object.file[name._reference] - elif isinstance(name, (bytes, str)): + if isinstance(name, (bytes, str)): x = self._group_object[name] else: raise TypeError( diff --git a/tests/test_with_real_data.py b/tests/test_with_real_data.py index 17d0401..4beeb5a 100644 --- a/tests/test_with_real_data.py +++ b/tests/test_with_real_data.py @@ -300,3 +300,7 @@ def test_with_real_data(): root = zarr.open(store, mode="r") _hdf5_visit_items(h5f, lambda key, item: _compare_item_2(item, root[key])) + + +if __name__ == "__main__": + test_with_real_data()