Skip to content

Commit

Permalink
Rewrite IO operations for H5, size variant attributes like tiles coor…
Browse files Browse the repository at this point in the history
…dination are no longer attached, they can be accessed by need. #10
  • Loading branch information
Mr-Milk committed Dec 16, 2023
1 parent ceeeda4 commit 4260a8d
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 171 deletions.
259 changes: 145 additions & 114 deletions lazyslide/h5.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class H5File:
"""A class that handle storage and reading of h5 file"""
"""A class that handles storage and reading of h5 file"""

COORDS_KEY = "coords"
MASKS_KEY = "masks"
Expand All @@ -23,137 +23,168 @@ def __init__(self, h5_file):
with h5py.File(h5_file, "w"):
pass
self.file = h5_file
self.coords = None
self.tile_ops = None
self.masks = {}
self.masks_level = {}
self.contours = []
self.holes = []
self.load()

self._rewrite = False
self._rewrite_mask = False
self._rewrite_contours = False

def set_coords(self, coords):
# Delete the previous exist one
self.coords = coords
self._rewrite = True
# Chunk by row-major order
self._save_dataset(self.COORDS_KEY, coords, chunks=True)

def load(self):
def get_coords(self):
return self._load_dataset(self.COORDS_KEY)

def get_one_coord_by_index(self, index):
return self._load_dataset_by_slice(self.COORDS_KEY, index)

def _load_dataset(self, key: str, group=None):
with h5py.File(self.file, "r") as h5:
if self.COORDS_KEY in h5:
ds = h5[self.COORDS_KEY]
self.coords = ds[:]
if group is not None:
if group not in h5:
return None
h5 = h5[group]
if key in h5:
ds = h5[key]
return ds[:]
else:
return None

def _load_dataset_by_slice(self, key: str, s: slice, group=None):
with h5py.File(self.file, "r") as h5:
if group is not None:
if group not in h5:
return None
h5 = h5[group]
if key in h5:
ds = h5[key]
return ds[s]
else:
return None

def _load_attrs(self, key: str, group=None):
with h5py.File(self.file, "r") as h5:
if group is not None:
if group not in h5:
return None
h5 = h5[group]
if key in h5:
ds = h5[key]
h5_attrs = ds.attrs
attrs = {}
for key in h5_attrs.keys():
value = h5_attrs.get(key)
if isinstance(value, Empty):
value = None
attrs[key] = value
self.tile_ops = TileOps(**attrs)
return attrs
else:
return None

if self.MASKS_KEY in h5:
masks = {}
masks_level = {}
masks_group = h5[self.MASKS_KEY]
for mask_name in masks_group.keys():
ds = masks_group.get(mask_name)
masks[mask_name] = ds[:]
masks_level[mask_name] = ds.attrs["level"]
self.masks = masks
self.masks_level = masks_level

if self.CONTOURS_KEY in h5:
contours = []
holes = []

contours_group = h5[self.CONTOURS_KEY]
c_keys = contours_group.keys()
for n in range(len(c_keys)):
ds = contours_group.get(f"{self.CONTOURS_KEY}_{n}")
contours.append(ds[:])

holes_group = h5[self.HOLES_kEY]
h_keys = holes_group.keys()
for n in range(len(h_keys)):
ds = holes_group.get(f"{self.HOLES_kEY}_{n}")
holes.append(ds[:])
self.contours = contours
self.holes = holes
def _save_dataset(self, key: str, data: np.ndarray, group=None, **kwargs):
with h5py.File(self.file, "r+") as h5:
if group is not None:
h5 = h5.require_group(group)
if key in h5:
del h5[key]
h5.create_dataset(
key,
data=data,
dtype=np.uint32,
**kwargs,
)

def _save_attr(self, key: str, attrs: dict, group=None):
with h5py.File(self.file, "r+") as h5:
if group is not None:
h5 = h5.require_group(group)
if key in h5:
ds = h5[key]
for k, v in attrs.items():
if v is None:
v = Empty(dtype="f")
ds.attrs[k] = v

def get_coords(self):
return self.coords
def _has_dataset(self, key: str, group=None):
with h5py.File(self.file, "r") as h5:
if group is not None:
if group not in h5:
return False
h5 = h5[group]
return key in h5

def set_tile_ops(self, tile_ops: TileOps):
self.tile_ops = tile_ops
self._rewrite = True
if self._has_dataset(self.COORDS_KEY):
new_attrs = asdict(tile_ops)
for k, v in new_attrs.items():
if v is None:
new_attrs[k] = Empty(dtype="f")
self._save_attr(self.COORDS_KEY, new_attrs)
else:
raise ValueError("Please set coords first")

def get_tile_ops(self):
return self.tile_ops

def set_mask(self, name, mask, level):
self.masks[name] = mask
self.masks_level[name] = level
self._rewrite_mask = True

def get_masks(self):
return self.masks, self.masks_level
attrs = self._load_attrs(self.COORDS_KEY)
if attrs is None:
return None
return TileOps(**attrs)

def set_mask(self, name: str, mask: np.ndarray, level: int):
self._save_dataset(name, mask, group=self.MASKS_KEY, chunks=False)
self._save_attr(name, {"level": level}, group=self.MASKS_KEY)

def get_masks(self, name) -> (np.ndarray, int):
if not self._has_dataset(name, group=self.MASKS_KEY):
return None, None
return self._load_dataset(name, group=self.MASKS_KEY), self._load_attrs(
name, group=self.MASKS_KEY
)["level"]

def get_available_masks(self):
with h5py.File(self.file, "r") as h5:
if self.MASKS_KEY in h5:
masks_group = h5[self.MASKS_KEY]
return list(masks_group.keys())
else:
return []

def set_contours_holes(self, contours, holes):
self.contours = contours
self.holes = holes
self._rewrite_contours = True
for i, arr in enumerate(contours):
self._save_dataset(f"{self.CONTOURS_KEY}_{i}", arr, group=self.CONTOURS_KEY)
for i, arr in enumerate(holes):
self._save_dataset(f"{self.HOLES_kEY}_{i}", arr, group=self.HOLES_kEY)
self._save_attr(
self.CONTOURS_KEY, {"length": len(contours)}, group=self.CONTOURS_KEY
)
self._save_attr(self.HOLES_kEY, {"length": len(holes)}, group=self.HOLES_kEY)

def get_contours_holes(self):
return self.contours, self.holes

def save(self):
with h5py.File(self.file, "r+") as h5:
if self._rewrite:
# Delete the previous exist coords
if self.COORDS_KEY in h5:
del h5[self.COORDS_KEY]

ds = h5.create_dataset(
self.COORDS_KEY,
data=self.coords,
chunks=True,
dtype=np.uint32,
)
attrs = ds.attrs
for k, v in asdict(self.tile_ops).items():
if v is None:
v = Empty(dtype="f")
attrs[k] = v

if self._rewrite_mask:
# Delete the previous exist masks
if self.MASKS_KEY in h5:
del h5[self.MASKS_KEY]

masks_group = h5.create_group(self.MASKS_KEY)
for mask_name, mask_array in self.masks.items():
ds = masks_group.create_dataset(
mask_name, data=mask_array, chunks=True
)
attrs = ds.attrs
attrs["level"] = self.masks_level[mask_name]

if self._rewrite_contours:
if self.CONTOURS_KEY in h5:
del h5[self.CONTOURS_KEY]

if self.HOLES_kEY in h5:
del h5[self.HOLES_kEY]

contours_group = h5.create_group(self.CONTOURS_KEY)
for i, arr in enumerate(self.contours):
dataset_name = f"{self.CONTOURS_KEY}_{i}"
contours_group.create_dataset(dataset_name, data=arr)

holes_group = h5.create_group(self.HOLES_kEY)
for i, arr in enumerate(self.holes):
dataset_name = f"{self.HOLES_kEY}_{i}"
holes_group.create_dataset(dataset_name, data=arr)
contours = []
holes = []
n_contours = self._load_attrs(self.CONTOURS_KEY, group=self.CONTOURS_KEY)
n_holes = self._load_attrs(self.HOLES_kEY, group=self.HOLES_kEY)
if n_contours is None or n_holes is None:
return [], []
else:
n_contours = n_contours["length"]
n_holes = n_holes["length"]
for i in range(n_contours):
contours.append(
self._load_dataset(f"{self.CONTOURS_KEY}_{i}", group=self.CONTOURS_KEY)
)
for i in range(n_holes):
holes.append(
self._load_dataset(f"{self.HOLES_kEY}_{i}", group=self.HOLES_kEY)
)

return contours, holes

@property
def has_tiles(self):
return self._has_dataset(self.COORDS_KEY)

@property
def has_masks(self):
return self._has_dataset(self.MASKS_KEY)

@property
def has_contours_holes(self):
return self._has_dataset(self.CONTOURS_KEY) and self._has_dataset(
self.HOLES_kEY
)
3 changes: 1 addition & 2 deletions lazyslide/loader/slides_balanced_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import warnings
from copy import deepcopy
from collections import deque

import numpy as np
from torch.utils.data import Dataset, Sampler, DataLoader
Expand Down Expand Up @@ -112,7 +111,7 @@ def __getitem__(self, ix):
wsi = self.wsi_list[self.proxy_ix[slide_ix]]

# change here how to get the coordinate
top, left = wsi.tiles_coords[tile_ix]
top, left = wsi.get_tile_by_index(tile_ix)
tile_ops = wsi.tile_ops
img = wsi.get_patch(
int(left),
Expand Down
Loading

0 comments on commit 4260a8d

Please sign in to comment.