From 8b312a99d1d2795195d5c7a9db98bfaf8cf6e519 Mon Sep 17 00:00:00 2001 From: jluethi Date: Wed, 18 Jan 2023 16:09:05 +0100 Subject: [PATCH 1/5] [BROKEN] Initial refactor work to load wells with varying shapes --- ome_zarr/reader.py | 135 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 15 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 25312051..bc75bbb7 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -490,7 +490,13 @@ def get_pyramid_lazy(self, node: Node) -> None: LOGGER.info("plate_data: %s", self.plate_data) self.rows = self.plate_data.get("rows") self.columns = self.plate_data.get("columns") - self.first_field = "0" + # TODO: Check which acquisitions are present + # Acquisitions are already stored at the plate level, + # so easy to get an overview about! + # load_multi_acquisition = True + # if load_multi_acquisition: + # pass + self.row_names = [row["name"] for row in self.rows] self.col_names = [col["name"] for col in self.columns] @@ -500,23 +506,55 @@ def get_pyramid_lazy(self, node: Node) -> None: self.row_count = len(self.rows) self.column_count = len(self.columns) + # 1) Get the dimensions for each well => dict of well specs? + # Current setup: Just get 1 well, assume this is always fitting + # Make this general: Currently just the dimension for first image in the well + # But could be generalized: Either when many FOVs are loaded. Or for multiplexing + # And could be loaded from aggregated metadata instead of loaded from each well + + # Loop over well self.well_paths + well_specs = self.get_plate_well_specs(node) + print(well_specs) + # Get the numpy type for the first well + self.numpy_type = well_specs[self.well_paths[0]].numpy_type + # img_pyramid_shapes are for a single well + print("img_pyramid_shapes: %s", well_specs[self.well_paths[0]].img_pyramid_shapes) + well_spec = well_specs[self.well_paths[0]] + self.axes = well_spec.img_metadata["axes"] + + # TODO: Find a better way to calculate this + self.levels = len(well_spec.img_pyramid_shapes) + # 2) Create the pyramid: list of dask arrays at different resolutions + # Currently: get_stitched_grid creates this, shape is simple because all wells are the same + # Going forward: get_stitched_grid function becomes more complex + # Do we need to get the max well size and pad all the wells? Or how do we do the layout? + # Easiest with a max well size (max in x & y) + + pyramid = [] + for level in range(self.levels): + lazy_plate = self.get_stiched_plate(level, well_specs) + pyramid.append(lazy_plate) + + # Get the first well... - well_zarr = self.zarr.create(self.well_paths[0]) - well_node = Node(well_zarr, node) - well_spec: Optional[Well] = well_node.first(Well) - if well_spec is None: - raise Exception("Could not find first well") - self.numpy_type = well_spec.numpy_type + # For loading plates of different shapes: Start here! Not just first well + # well_zarr = self.zarr.create(self.well_paths[0]) + # well_node = Node(well_zarr, node) + # well_spec: Optional[Well] = well_node.first(Well) + # if well_spec is None: + # raise Exception("Could not find first well") + # self.numpy_type = well_spec.numpy_type - LOGGER.debug("img_pyramid_shapes: %s", well_spec.img_pyramid_shapes) + # LOGGER.debug("img_pyramid_shapes: %s", well_spec.img_pyramid_shapes) - self.axes = well_spec.img_metadata["axes"] + # self.axes = well_spec.img_metadata["axes"] # Create a dask pyramid for the plate - pyramid = [] - for level, tile_shape in enumerate(well_spec.img_pyramid_shapes): - lazy_plate = self.get_stitched_grid(level, tile_shape) - pyramid.append(lazy_plate) + # pyramid = [] + # for level, tile_shape in enumerate(well_spec.img_pyramid_shapes): + # lazy_plate = self.get_stitched_grid(level, tile_shape) + # pyramid.append(lazy_plate) + # Set the node.data to be pyramid view of the plate node.data = pyramid @@ -526,13 +564,78 @@ def get_pyramid_lazy(self, node: Node) -> None: # "metadata" dict gets added to each 'plate' layer in napari node.metadata.update({"metadata": {"plate": self.plate_data}}) + def get_stiched_plate(self, level: int, well_specs: Dict): + print(f"get_stiched_plate() level: {level}") + # New method to replace get_stitched_grid that can load a different + # shape for each well + def get_tile(tile_name: str) -> np.ndarray: + """tile_name is 'level,z,c,t,row,col'""" + path = self.get_new_tile_path(level, tile_name) + LOGGER.debug("LOADING tile... %s with shape: %s", path, tile_shape) + + try: + data = self.zarr.load(path) + except ValueError: + LOGGER.exception("Failed to load %s", path) + data = np.zeros(tile_shape, dtype=self.numpy_type) + return data + + lazy_reader = delayed(get_tile) + + lazy_rows = [] + # For level 0, return whole image for each tile + # We should not just try to load every row & column, but only the ones that are present + # Thus, loop over the dict + # BUT: How do we place them in the big array afterwards? => gets more complex + # Just concatenate all of them for now + for tile_name, well_spec in well_specs.items(): + print(f"Loading tile {tile_name}, level {level}") + tile_shape = well_spec.img_pyramid_shapes[level] + print(f'Tile shape: {tile_shape}') + lazy_tile = da.from_delayed( + lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type + ) + lazy_rows.append(lazy_tile) + return da.concatenate(lazy_rows, axis=len(self.axes) - 2) + + # for row in range(self.row_count): + # lazy_row: List[da.Array] = [] + # for col in range(self.column_count): + # tile_name = f"{row},{col}" + # # tile_shape = + # print(f"Loading tile {tile_name}, level {level}") + # lazy_tile = da.from_delayed( + # lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type + # ) + # lazy_row.append(lazy_tile) + # lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) + # print(lazy_rows) + # return da.concatenate(lazy_rows, axis=len(self.axes) - 2) + + + + def get_plate_well_specs(self, node) -> Dict: + well_specs = {} + for well_path in self.well_paths: + print(f'Loading Well spec for {well_path}') + well_zarr = self.zarr.create(well_path) + well_node = Node(well_zarr, node) + well_spec: Optional[Well] = well_node.first(Well) + well_specs[well_path] = well_spec + return well_specs + def get_numpy_type(self, image_node: Node) -> np.dtype: return image_node.data[0].dtype - def get_tile_path(self, level: int, row: int, col: int) -> str: + def get_new_tile_path(self, level: int, tile_name: str, image_index: int = 0) -> str: + return ( + f"{tile_name}/{image_index}/{level}" + ) + + def get_tile_path(self, level: int, row: int, col: int, image_index: int = 0) -> str: return ( f"{self.row_names[row]}/" - f"{self.col_names[col]}/{self.first_field}/{level}" + f"{self.col_names[col]}/{image_index}/{level}" ) def get_stitched_grid(self, level: int, tile_shape: tuple) -> da.core.Array: @@ -559,11 +662,13 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_row: List[da.Array] = [] for col in range(self.column_count): tile_name = f"{row},{col}" + print(f"Loading tile {tile_name}, level {level}") lazy_tile = da.from_delayed( lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type ) lazy_row.append(lazy_tile) lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) + print(lazy_rows) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) From 6d08f1de0f3ba3fe2f2ac839d209ce6b6ebd97d1 Mon Sep 17 00:00:00 2001 From: jluethi Date: Thu, 19 Jan 2023 09:38:48 +0100 Subject: [PATCH 2/5] Add padding of wells to max size --- ome_zarr/reader.py | 99 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 23 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index bc75bbb7..5ab5d082 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -499,6 +499,8 @@ def get_pyramid_lazy(self, node: Node) -> None: self.row_names = [row["name"] for row in self.rows] self.col_names = [col["name"] for col in self.columns] + print(self.row_names) + print(self.col_names) self.well_paths = [well["path"] for well in self.plate_data.get("wells")] self.well_paths.sort() @@ -580,14 +582,78 @@ def get_tile(tile_name: str) -> np.ndarray: data = np.zeros(tile_shape, dtype=self.numpy_type) return data + def get_max_well_size(well_specs, padding: int = 5): + """ + Calculates the max size of any of the wells + + :param well_specs: Dict of well_spec (Well Node) + :param padding: xy padding to be added between wells in x & y + + """ + # FIXME: Figure out the real downsampling factor + downsampling_factor = 2 + # FIXME: Get max pyramid level + max_level = 4 + # max_well_dims = list(list(well_specs.values())[0].img_pyramid_shapes[level]) + # for well_spec in well_specs.values(): + # new_dims = well_spec.img_pyramid_shapes[level] + # for dim in range(len(max_well_dims)): + # if new_dims[dim] > max_well_dims[dim]: + # max_well_dims[dim] = new_dims[dim] + # return max_well_dims + for well_spec in well_specs.values(): + new_dims = well_spec.img_pyramid_shapes[level] + for dim in range(len(max_well_dims) - 2): + if new_dims[dim] > max_well_dims[dim]: + max_well_dims[dim] = new_dims[dim] + for dim in range(len(max_well_dims) - 2, len(max_well_dims)): + real_padding = padding * downsampling_factor ** -(level - max_level) + if new_dims[dim] + real_padding > max_well_dims[dim]: + max_well_dims[dim] = new_dims[dim] + real_padding + return max_well_dims + + def calculate_required_padding(max_well_dims, tile_shape): + # Calculate the required padding by dimension + diff_size = [] + for i in range(len(max_well_dims)): + diff_size.append(max_well_dims[i] - tile_shape[i]) + + # Decide which side gets padded + # Logic: + # 1. Pad x & y equally on both sides + # 2. Pad z, c, t on right side (keep aligned at the same 0) + # Limitations: + # 1. Does not take into account transformations + # 2. FIXME: Padding of channels is not optimal, could make a + # channel appear as something that its not in the viewer + padding = [] + for i in range(len(max_well_dims)-2): + padding.append((0, diff_size[i])) + + for i in range(len(max_well_dims)-2, len(max_well_dims)): + padding.append((int(diff_size[i]/2), round(diff_size[i]/2 + 0.1))) + + return tuple(padding) + + max_well_dims = get_max_well_size(well_specs) + print(f'Max well dims: {max_well_dims}') + lazy_reader = delayed(get_tile) lazy_rows = [] - # For level 0, return whole image for each tile - # We should not just try to load every row & column, but only the ones that are present - # Thus, loop over the dict - # BUT: How do we place them in the big array afterwards? => gets more complex - # Just concatenate all of them for now + # TODO: Figure out how to recreate a plate layout from this and lazily build it + + # TODO: Fill "empty" columns/rows as well? i.e. only loop over existing rows & cols + # or also whole columns / rows those that don't exist? + # [Check spec: Is it always letters for rows and int for columns] + + # TODO: Handle different Z levels (I actually have this in the 23 well data) => where to pad? + + # TODO: Handle the case where different wells have a different number of channels? + # i.e. how do we handle the case where one well has fewer channels than another + # Problem: Hard to know what would need to be padded! May put the channel in the wrong place... + # => initial scope only xyz? + for tile_name, well_spec in well_specs.items(): print(f"Loading tile {tile_name}, level {level}") tile_shape = well_spec.img_pyramid_shapes[level] @@ -595,24 +661,11 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_tile = da.from_delayed( lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type ) - lazy_rows.append(lazy_tile) - return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - - # for row in range(self.row_count): - # lazy_row: List[da.Array] = [] - # for col in range(self.column_count): - # tile_name = f"{row},{col}" - # # tile_shape = - # print(f"Loading tile {tile_name}, level {level}") - # lazy_tile = da.from_delayed( - # lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type - # ) - # lazy_row.append(lazy_tile) - # lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) - # print(lazy_rows) - # return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - - + # Calculate required padding + padding = calculate_required_padding(max_well_dims, tile_shape) + padded_lazy_tile = da.pad(lazy_tile, pad_width = padding, mode = 'constant', constant_values = 0) + lazy_rows.append(padded_lazy_tile) + return da.concatenate(lazy_rows, axis=len(self.axes) - 1) def get_plate_well_specs(self, node) -> Dict: well_specs = {} From e9cc3a4f5101508707726e88286e71b497d8685e Mon Sep 17 00:00:00 2001 From: jluethi Date: Thu, 19 Jan 2023 09:59:23 +0100 Subject: [PATCH 3/5] Reestablish plate layout --- ome_zarr/reader.py | 66 ++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 5ab5d082..1852b7f1 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -582,19 +582,19 @@ def get_tile(tile_name: str) -> np.ndarray: data = np.zeros(tile_shape, dtype=self.numpy_type) return data - def get_max_well_size(well_specs, padding: int = 5): + def get_max_well_size(well_specs, padding: int = 10): """ Calculates the max size of any of the wells :param well_specs: Dict of well_spec (Well Node) - :param padding: xy padding to be added between wells in x & y + :param padding: xy padding to be added between wells """ # FIXME: Figure out the real downsampling factor downsampling_factor = 2 # FIXME: Get max pyramid level max_level = 4 - # max_well_dims = list(list(well_specs.values())[0].img_pyramid_shapes[level]) + max_well_dims = list(list(well_specs.values())[0].img_pyramid_shapes[level]) # for well_spec in well_specs.values(): # new_dims = well_spec.img_pyramid_shapes[level] # for dim in range(len(max_well_dims)): @@ -640,32 +640,41 @@ def calculate_required_padding(max_well_dims, tile_shape): lazy_reader = delayed(get_tile) + # TODO: Test different Z levels + + # TODO: Test different channels lazy_rows = [] - # TODO: Figure out how to recreate a plate layout from this and lazily build it - - # TODO: Fill "empty" columns/rows as well? i.e. only loop over existing rows & cols - # or also whole columns / rows those that don't exist? - # [Check spec: Is it always letters for rows and int for columns] - - # TODO: Handle different Z levels (I actually have this in the 23 well data) => where to pad? - - # TODO: Handle the case where different wells have a different number of channels? - # i.e. how do we handle the case where one well has fewer channels than another - # Problem: Hard to know what would need to be padded! May put the channel in the wrong place... - # => initial scope only xyz? - - for tile_name, well_spec in well_specs.items(): - print(f"Loading tile {tile_name}, level {level}") - tile_shape = well_spec.img_pyramid_shapes[level] - print(f'Tile shape: {tile_shape}') - lazy_tile = da.from_delayed( - lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type - ) - # Calculate required padding - padding = calculate_required_padding(max_well_dims, tile_shape) - padded_lazy_tile = da.pad(lazy_tile, pad_width = padding, mode = 'constant', constant_values = 0) - lazy_rows.append(padded_lazy_tile) - return da.concatenate(lazy_rows, axis=len(self.axes) - 1) + for row_name in self.row_names: + lazy_row: List[da.Array] = [] + for col_name in self.col_names: + tile_name = f"{row_name}/{col_name}" + if tile_name in well_specs: + tile_shape = well_specs[tile_name].img_pyramid_shapes[level] + lazy_tile = da.from_delayed( + lazy_reader(tile_name), + shape=tile_shape, + dtype=self.numpy_type + ) + padding = calculate_required_padding( + max_well_dims, + tile_shape + ) + padded_lazy_tile = da.pad( + lazy_tile, + pad_width = padding, + mode = 'constant', + constant_values = 0 + ) + else: + # If a well does not exist on disk, + # just get an array of 0s of the fitting size + padded_lazy_tile = da.zeros( + max_well_dims, + dtype=self.numpy_type + ) + lazy_row.append(padded_lazy_tile) + lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) + return da.concatenate(lazy_rows, axis=len(self.axes) - 2) def get_plate_well_specs(self, node) -> Dict: well_specs = {} @@ -721,7 +730,6 @@ def get_tile(tile_name: str) -> np.ndarray: ) lazy_row.append(lazy_tile) lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) - print(lazy_rows) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) From eba8622f1d992a7413cd83a727c052edb017a486 Mon Sep 17 00:00:00 2001 From: jluethi Date: Thu, 19 Jan 2023 11:11:37 +0100 Subject: [PATCH 4/5] Cleanup and add back old code path as optional way --- ome_zarr/reader.py | 147 ++++++++++++++++++++------------------------- 1 file changed, 66 insertions(+), 81 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 1852b7f1..08673992 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -481,7 +481,7 @@ def __init__(self, node: Node) -> None: LOGGER.debug("Plate created with ZarrLocation fmt: %s", self.zarr.fmt) self.get_pyramid_lazy(node) - def get_pyramid_lazy(self, node: Node) -> None: + def get_pyramid_lazy(self, node: Node, loading: str = "new") -> None: """ Return a pyramid of dask data, where the highest resolution is the stitched full-resolution images. @@ -490,73 +490,61 @@ def get_pyramid_lazy(self, node: Node) -> None: LOGGER.info("plate_data: %s", self.plate_data) self.rows = self.plate_data.get("rows") self.columns = self.plate_data.get("columns") - # TODO: Check which acquisitions are present - # Acquisitions are already stored at the plate level, - # so easy to get an overview about! - # load_multi_acquisition = True - # if load_multi_acquisition: - # pass self.row_names = [row["name"] for row in self.rows] self.col_names = [col["name"] for col in self.columns] - print(self.row_names) - print(self.col_names) - self.well_paths = [well["path"] for well in self.plate_data.get("wells")] + self.well_paths = [well["path"] for well in + self.plate_data.get("wells")] self.well_paths.sort() self.row_count = len(self.rows) self.column_count = len(self.columns) - # 1) Get the dimensions for each well => dict of well specs? - # Current setup: Just get 1 well, assume this is always fitting - # Make this general: Currently just the dimension for first image in the well - # But could be generalized: Either when many FOVs are loaded. Or for multiplexing - # And could be loaded from aggregated metadata instead of loaded from each well - - # Loop over well self.well_paths - well_specs = self.get_plate_well_specs(node) - print(well_specs) - # Get the numpy type for the first well - self.numpy_type = well_specs[self.well_paths[0]].numpy_type - # img_pyramid_shapes are for a single well - print("img_pyramid_shapes: %s", well_specs[self.well_paths[0]].img_pyramid_shapes) - well_spec = well_specs[self.well_paths[0]] - self.axes = well_spec.img_metadata["axes"] - - # TODO: Find a better way to calculate this - self.levels = len(well_spec.img_pyramid_shapes) - # 2) Create the pyramid: list of dask arrays at different resolutions - # Currently: get_stitched_grid creates this, shape is simple because all wells are the same - # Going forward: get_stitched_grid function becomes more complex - # Do we need to get the max well size and pad all the wells? Or how do we do the layout? - # Easiest with a max well size (max in x & y) - - pyramid = [] - for level in range(self.levels): - lazy_plate = self.get_stiched_plate(level, well_specs) - pyramid.append(lazy_plate) - - - # Get the first well... - # For loading plates of different shapes: Start here! Not just first well - # well_zarr = self.zarr.create(self.well_paths[0]) - # well_node = Node(well_zarr, node) - # well_spec: Optional[Well] = well_node.first(Well) - # if well_spec is None: - # raise Exception("Could not find first well") - # self.numpy_type = well_spec.numpy_type - - # LOGGER.debug("img_pyramid_shapes: %s", well_spec.img_pyramid_shapes) - - # self.axes = well_spec.img_metadata["axes"] - - # Create a dask pyramid for the plate - # pyramid = [] - # for level, tile_shape in enumerate(well_spec.img_pyramid_shapes): - # lazy_plate = self.get_stitched_grid(level, tile_shape) - # pyramid.append(lazy_plate) + # Default loading path + if loading == "default": + self.first_field = "0" + # Get the first well... + well_zarr = self.zarr.create(self.well_paths[0]) + well_node = Node(well_zarr, node) + well_spec: Optional[Well] = well_node.first(Well) + if well_spec is None: + raise Exception("Could not find first well") + self.numpy_type = well_spec.numpy_type + + LOGGER.debug("img_pyramid_shapes: %s", well_spec.img_pyramid_shapes) + + self.axes = well_spec.img_metadata["axes"] + # Create a dask pyramid for the plate + pyramid = [] + for level, tile_shape in enumerate(well_spec.img_pyramid_shapes): + lazy_plate = self.get_stitched_grid(level, tile_shape) + pyramid.append(lazy_plate) + + # New loading path that handles wells with different shapes + elif loading == "new": + # Get all the well specs + # TODO: Find a way to speed up this loading, i.e. discussion here: + # https://github.com/ome/ngff/issues/141 + well_specs = self.get_plate_well_specs(node) + + # Assumption: The following information is consistent across wells + # Get the numpy type for the first well + well_spec = well_specs[self.well_paths[0]] + self.numpy_type = well_spec.numpy_type + LOGGER.debug("img_pyramid_shapes: %s", well_spec.img_pyramid_shapes) + self.axes = well_spec.img_metadata["axes"] + self.levels = len(well_spec.img_pyramid_shapes) + # FIXME: Figure out the real downsampling factor + self.downsampling_factor = 2 + pyramid = [] + for level in range(self.levels): + lazy_plate = self.get_stiched_plate(level, well_specs) + pyramid.append(lazy_plate) + + else: + raise Exception('No valid loading path specified') # Set the node.data to be pyramid view of the plate node.data = pyramid @@ -567,7 +555,7 @@ def get_pyramid_lazy(self, node: Node) -> None: node.metadata.update({"metadata": {"plate": self.plate_data}}) def get_stiched_plate(self, level: int, well_specs: Dict): - print(f"get_stiched_plate() level: {level}") + LOGGER.debug(f"get_stiched_plate() level: {level}") # New method to replace get_stitched_grid that can load a different # shape for each well def get_tile(tile_name: str) -> np.ndarray: @@ -578,6 +566,9 @@ def get_tile(tile_name: str) -> np.ndarray: try: data = self.zarr.load(path) except ValueError: + # With the new loading scheme, I don't think we hit this + # part anymore, but maybe there are exceptions where it still + # occurs? LOGGER.exception("Failed to load %s", path) data = np.zeros(tile_shape, dtype=self.numpy_type) return data @@ -590,24 +581,16 @@ def get_max_well_size(well_specs, padding: int = 10): :param padding: xy padding to be added between wells """ - # FIXME: Figure out the real downsampling factor - downsampling_factor = 2 - # FIXME: Get max pyramid level - max_level = 4 - max_well_dims = list(list(well_specs.values())[0].img_pyramid_shapes[level]) - # for well_spec in well_specs.values(): - # new_dims = well_spec.img_pyramid_shapes[level] - # for dim in range(len(max_well_dims)): - # if new_dims[dim] > max_well_dims[dim]: - # max_well_dims[dim] = new_dims[dim] - # return max_well_dims + max_well_dims = list(list(well_specs.values())[0] \ + .img_pyramid_shapes[level]) for well_spec in well_specs.values(): new_dims = well_spec.img_pyramid_shapes[level] for dim in range(len(max_well_dims) - 2): if new_dims[dim] > max_well_dims[dim]: max_well_dims[dim] = new_dims[dim] for dim in range(len(max_well_dims) - 2, len(max_well_dims)): - real_padding = padding * downsampling_factor ** -(level - max_level) + real_padding = padding * \ + self.downsampling_factor ** -(level - self.levels) if new_dims[dim] + real_padding > max_well_dims[dim]: max_well_dims[dim] = new_dims[dim] + real_padding return max_well_dims @@ -631,18 +614,15 @@ def calculate_required_padding(max_well_dims, tile_shape): padding.append((0, diff_size[i])) for i in range(len(max_well_dims)-2, len(max_well_dims)): - padding.append((int(diff_size[i]/2), round(diff_size[i]/2 + 0.1))) + padding.append((int(diff_size[i]/2), + round(diff_size[i]/2 + 0.1))) return tuple(padding) max_well_dims = get_max_well_size(well_specs) - print(f'Max well dims: {max_well_dims}') lazy_reader = delayed(get_tile) - # TODO: Test different Z levels - - # TODO: Test different channels lazy_rows = [] for row_name in self.row_names: lazy_row: List[da.Array] = [] @@ -679,7 +659,7 @@ def calculate_required_padding(max_well_dims, tile_shape): def get_plate_well_specs(self, node) -> Dict: well_specs = {} for well_path in self.well_paths: - print(f'Loading Well spec for {well_path}') + LOGGER.info(f'Loading Well spec for {well_path}') well_zarr = self.zarr.create(well_path) well_node = Node(well_zarr, node) well_spec: Optional[Well] = well_node.first(Well) @@ -689,15 +669,20 @@ def get_plate_well_specs(self, node) -> Dict: def get_numpy_type(self, image_node: Node) -> np.dtype: return image_node.data[0].dtype - def get_new_tile_path(self, level: int, tile_name: str, image_index: int = 0) -> str: + def get_new_tile_path( + self, + level: int, + tile_name: str, + image_index: int = 0 + ) -> str: return ( f"{tile_name}/{image_index}/{level}" ) - def get_tile_path(self, level: int, row: int, col: int, image_index: int = 0) -> str: + def get_tile_path(self, level: int, row: int, col: int) -> str: return ( f"{self.row_names[row]}/" - f"{self.col_names[col]}/{image_index}/{level}" + f"{self.col_names[col]}/{self.first_field}/{level}" ) def get_stitched_grid(self, level: int, tile_shape: tuple) -> da.core.Array: @@ -724,7 +709,6 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_row: List[da.Array] = [] for col in range(self.column_count): tile_name = f"{row},{col}" - print(f"Loading tile {tile_name}, level {level}") lazy_tile = da.from_delayed( lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type ) @@ -733,6 +717,7 @@ def get_tile(tile_name: str) -> np.ndarray: return da.concatenate(lazy_rows, axis=len(self.axes) - 2) + class PlateLabels(Plate): def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover """251.zarr/A/1/0/labels/0/3/""" From 20c35e6a0e4600b57181be55c44f08f42ac83078 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Jan 2023 10:42:43 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ome_zarr/reader.py | 75 +++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 47 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 08673992..7e005715 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -494,8 +494,7 @@ def get_pyramid_lazy(self, node: Node, loading: str = "new") -> None: self.row_names = [row["name"] for row in self.rows] self.col_names = [col["name"] for col in self.columns] - self.well_paths = [well["path"] for well in - self.plate_data.get("wells")] + self.well_paths = [well["path"] for well in self.plate_data.get("wells")] self.well_paths.sort() self.row_count = len(self.rows) @@ -527,7 +526,7 @@ def get_pyramid_lazy(self, node: Node, loading: str = "new") -> None: # TODO: Find a way to speed up this loading, i.e. discussion here: # https://github.com/ome/ngff/issues/141 well_specs = self.get_plate_well_specs(node) - + # Assumption: The following information is consistent across wells # Get the numpy type for the first well well_spec = well_specs[self.well_paths[0]] @@ -542,9 +541,9 @@ def get_pyramid_lazy(self, node: Node, loading: str = "new") -> None: for level in range(self.levels): lazy_plate = self.get_stiched_plate(level, well_specs) pyramid.append(lazy_plate) - + else: - raise Exception('No valid loading path specified') + raise Exception("No valid loading path specified") # Set the node.data to be pyramid view of the plate node.data = pyramid @@ -556,7 +555,7 @@ def get_pyramid_lazy(self, node: Node, loading: str = "new") -> None: def get_stiched_plate(self, level: int, well_specs: Dict): LOGGER.debug(f"get_stiched_plate() level: {level}") - # New method to replace get_stitched_grid that can load a different + # New method to replace get_stitched_grid that can load a different # shape for each well def get_tile(tile_name: str) -> np.ndarray: """tile_name is 'level,z,c,t,row,col'""" @@ -566,8 +565,8 @@ def get_tile(tile_name: str) -> np.ndarray: try: data = self.zarr.load(path) except ValueError: - # With the new loading scheme, I don't think we hit this - # part anymore, but maybe there are exceptions where it still + # With the new loading scheme, I don't think we hit this + # part anymore, but maybe there are exceptions where it still # occurs? LOGGER.exception("Failed to load %s", path) data = np.zeros(tile_shape, dtype=self.numpy_type) @@ -581,16 +580,16 @@ def get_max_well_size(well_specs, padding: int = 10): :param padding: xy padding to be added between wells """ - max_well_dims = list(list(well_specs.values())[0] \ - .img_pyramid_shapes[level]) + max_well_dims = list(list(well_specs.values())[0].img_pyramid_shapes[level]) for well_spec in well_specs.values(): new_dims = well_spec.img_pyramid_shapes[level] for dim in range(len(max_well_dims) - 2): if new_dims[dim] > max_well_dims[dim]: max_well_dims[dim] = new_dims[dim] for dim in range(len(max_well_dims) - 2, len(max_well_dims)): - real_padding = padding * \ - self.downsampling_factor ** -(level - self.levels) + real_padding = padding * self.downsampling_factor ** -( + level - self.levels + ) if new_dims[dim] + real_padding > max_well_dims[dim]: max_well_dims[dim] = new_dims[dim] + real_padding return max_well_dims @@ -600,23 +599,22 @@ def calculate_required_padding(max_well_dims, tile_shape): diff_size = [] for i in range(len(max_well_dims)): diff_size.append(max_well_dims[i] - tile_shape[i]) - + # Decide which side gets padded - # Logic: + # Logic: # 1. Pad x & y equally on both sides # 2. Pad z, c, t on right side (keep aligned at the same 0) # Limitations: # 1. Does not take into account transformations - # 2. FIXME: Padding of channels is not optimal, could make a + # 2. FIXME: Padding of channels is not optimal, could make a # channel appear as something that its not in the viewer padding = [] - for i in range(len(max_well_dims)-2): + for i in range(len(max_well_dims) - 2): padding.append((0, diff_size[i])) - - for i in range(len(max_well_dims)-2, len(max_well_dims)): - padding.append((int(diff_size[i]/2), - round(diff_size[i]/2 + 0.1))) - + + for i in range(len(max_well_dims) - 2, len(max_well_dims)): + padding.append((int(diff_size[i] / 2), round(diff_size[i] / 2 + 0.1))) + return tuple(padding) max_well_dims = get_max_well_size(well_specs) @@ -631,35 +629,24 @@ def calculate_required_padding(max_well_dims, tile_shape): if tile_name in well_specs: tile_shape = well_specs[tile_name].img_pyramid_shapes[level] lazy_tile = da.from_delayed( - lazy_reader(tile_name), - shape=tile_shape, - dtype=self.numpy_type - ) - padding = calculate_required_padding( - max_well_dims, - tile_shape + lazy_reader(tile_name), shape=tile_shape, dtype=self.numpy_type ) + padding = calculate_required_padding(max_well_dims, tile_shape) padded_lazy_tile = da.pad( - lazy_tile, - pad_width = padding, - mode = 'constant', - constant_values = 0 + lazy_tile, pad_width=padding, mode="constant", constant_values=0 ) else: - # If a well does not exist on disk, + # If a well does not exist on disk, # just get an array of 0s of the fitting size - padded_lazy_tile = da.zeros( - max_well_dims, - dtype=self.numpy_type - ) + padded_lazy_tile = da.zeros(max_well_dims, dtype=self.numpy_type) lazy_row.append(padded_lazy_tile) - lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) + lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) def get_plate_well_specs(self, node) -> Dict: well_specs = {} for well_path in self.well_paths: - LOGGER.info(f'Loading Well spec for {well_path}') + LOGGER.info(f"Loading Well spec for {well_path}") well_zarr = self.zarr.create(well_path) well_node = Node(well_zarr, node) well_spec: Optional[Well] = well_node.first(Well) @@ -670,14 +657,9 @@ def get_numpy_type(self, image_node: Node) -> np.dtype: return image_node.data[0].dtype def get_new_tile_path( - self, - level: int, - tile_name: str, - image_index: int = 0 + self, level: int, tile_name: str, image_index: int = 0 ) -> str: - return ( - f"{tile_name}/{image_index}/{level}" - ) + return f"{tile_name}/{image_index}/{level}" def get_tile_path(self, level: int, row: int, col: int) -> str: return ( @@ -717,7 +699,6 @@ def get_tile(tile_name: str) -> np.ndarray: return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - class PlateLabels(Plate): def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover """251.zarr/A/1/0/labels/0/3/"""