From 076b959c2bbba89f457f1468c7d458c4d54a9b7f Mon Sep 17 00:00:00 2001 From: CielAl Date: Thu, 25 Apr 2024 09:19:08 -0400 Subject: [PATCH 1/6] Deploy OpenSlide handle abstraction to BaseImage and all *Module.py. Make CuImage Handle prototype ready for future development. Massive code cleaning --- .gitignore | 6 + histoqc/AnnotationModule.py | 11 +- histoqc/BaseImage.py | 259 +++++++--------- histoqc/BasicModule.py | 42 +-- histoqc/BlurDetectionModule.py | 25 +- histoqc/BrightContrastModule.py | 27 +- histoqc/BubbleRegionByRegion.py | 109 ++++--- histoqc/ClassificationModule.py | 40 +-- histoqc/DeconvolutionModule.py | 26 +- histoqc/HistogramModule.py | 22 +- histoqc/LightDarkModule.py | 47 ++- histoqc/LocalTextureEstimationModule.py | 18 +- histoqc/MorphologyModule.py | 23 +- histoqc/SaveModule.py | 54 ++-- histoqc/TileExtractionModule.py | 18 +- histoqc/__main__.py | 7 +- histoqc/_pipeline.py | 7 +- histoqc/_worker.py | 10 +- histoqc/annotations/annot_collection.py | 2 +- histoqc/annotations/annotation/base.py | 3 +- histoqc/annotations/annotation/geojson.py | 2 +- histoqc/config/__main__.py | 2 +- histoqc/import_wrapper/__init__.py | 1 + histoqc/import_wrapper/helper.py | 89 +++++- histoqc/import_wrapper/openslide.py | 7 + histoqc/import_wrapper/typing.py | 6 - histoqc/tests/test_pipeline_cli.py | 4 +- histoqc/tests/test_ui_cli.py | 2 - histoqc/wsi_handles/base.py | 340 ++++++++++++++++++++++ histoqc/wsi_handles/constants.py | 20 ++ histoqc/wsi_handles/cuimage_handle.py | 171 +++++++++++ histoqc/wsi_handles/openslide_handle.py | 145 +++++++++ histoqc/wsi_handles/utils.py | 38 +++ pyproject.toml | 4 + setup.py | 4 + 35 files changed, 1155 insertions(+), 436 deletions(-) delete mode 100644 histoqc/import_wrapper/typing.py create mode 100644 histoqc/wsi_handles/base.py create mode 100644 histoqc/wsi_handles/constants.py create mode 100644 histoqc/wsi_handles/cuimage_handle.py create mode 100644 histoqc/wsi_handles/openslide_handle.py create mode 100644 histoqc/wsi_handles/utils.py diff --git a/.gitignore b/.gitignore index 495397d..e9a2dde 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ wheels/ *.egg-info/ *.egg *.whl +*.log .coverage* MANIFEST histoqc/tests/data/* @@ -24,4 +25,9 @@ htmlcov/ histoqc/_version.py environment.yml +*.svs +/histoqc/tests/data/ +.histoqc_pkg_data* +.fuse_hidden* +.pytest* diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py index 323fb1f..67414f6 100644 --- a/histoqc/AnnotationModule.py +++ b/histoqc/AnnotationModule.py @@ -1,7 +1,8 @@ import logging from typing import List, Tuple -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io +from skimage.util import img_as_ubyte import os from pathlib import PurePosixPath, Path from shapely.geometry import Polygon @@ -47,13 +48,13 @@ def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollectio return np.array(mask) -def getParams(s, params): +def getParams(s: BaseImage, params): # read params - format: xml, json; file_path; suffix; ann_format = params.get("format", None) file_path = params.get("file_path", None) suffix = params.get("suffix", "") - # try use default value if the params are not provided + # try using default value if the params are not provided if not ann_format: # set default format ann_format = "xml" @@ -73,7 +74,7 @@ def getParams(s, params): return ann_format, file_path, suffix -def saveAnnotationMask(s, params): +def saveAnnotationMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetAnnotationMask") (ann_format, file_path, suffix) = getParams(s, params) diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index 947f7c7..91e1a7e 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -1,23 +1,50 @@ +from __future__ import annotations import logging import os import numpy as np -import zlib, dill +import zlib +import dill from distutils.util import strtobool -from PIL import Image import re -from typing import Union, Tuple -#os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically -from histoqc.import_wrapper.openslide import openslide - -# there is no branch reset group in re -# compatible with the previous definition of valid input: leading zero and leading decimals are supported +from typing import Union, Tuple, cast +from histoqc.wsi_handles.base import WSIImageHandle +from histoqc.wsi_handles.constants import KEY_OPENSLIDE _REGEX_MAG = r"^(\d?\.?\d*X?)" _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE) MAG_NA = None +# todo either document or regularize the fields of BaseImage +# class BaseImageData(TypedDict): +# warnings: List[str] +# output: List +# filename: str +# comments: str +# outdir: str +# dir: str +# # width, height +# image_base_size: Tuple[int, int] +# enable_bounding_box: bool +# image_work_size: str | float +# mask_statistics: str +# base_mag: Optional[float] +# img_mask_use: np.ndarray +# img_mask_force: List[str] +# completed: List[str] +# img_bbox: Tuple[int, int, int, int] + class BaseImage(dict): + __image_handle: WSIImageHandle + + @property + def image_handle(self) -> WSIImageHandle: + return self.__image_handle + + @image_handle.setter + def image_handle(self, image_handle: WSIImageHandle): + self.__image_handle = image_handle + def __init__(self, fname, fname_outdir, params): dict.__init__(self) @@ -33,27 +60,34 @@ def __init__(self, fname, fname_outdir, params): self["outdir"] = fname_outdir self["dir"] = os.path.dirname(fname) - self["os_handle"] = openslide.OpenSlide(fname) - self["image_base_size"] = self["os_handle"].dimensions - self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box","False")) - # check if the bbox if doesn't have bbox set enable_bounding_box to False + # get handles from config + handles = params.get("handles", KEY_OPENSLIDE) + # dynamically load wsi image handle + self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles) + + self["image_base_size"] = self.image_handle.dimensions + self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box", "False")) + # check: if it doesn't have bbox set enable_bounding_box to False self.setBBox() self.addToPrintList("image_bounding_box", self["img_bbox"]) self["image_work_size"] = params.get("image_work_size", "1.25x") self["mask_statistics"] = params.get("mask_statistics", "relative2mask") - + self["base_mag"] = getMag(self, params) if not self["base_mag"]: - logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?") - return -1 + logging.error( + f"{self['filename']}: Has unknown or uncalculated base magnification," + f" cannot specify magnification scale! Did you try getMag?") + return self.addToPrintList("base_mag", self["base_mag"]) mask_statistics_types = ["relative2mask", "absolute", "relative2image"] - if (self["mask_statistics"] not in mask_statistics_types): + if self["mask_statistics"] not in mask_statistics_types: logging.error( - f"mask_statistic type '{self['mask_statistics']}' is not one of the 3 supported options relative2mask, absolute, relative2image!") + f"mask_statistic type '{self['mask_statistics']}'" + f" is not one of the 3 supported options relative2mask, absolute, relative2image!") exit() self["img_mask_use"] = np.ones(self.getImgThumb(self["image_work_size"]).shape[0:2], dtype=bool) @@ -63,37 +97,32 @@ def __init__(self, fname, fname_outdir, params): def __getitem__(self, key): value = super(BaseImage, self).__getitem__(key) - if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"): value = dill.loads(zlib.decompress(value)) return value def __setitem__(self, key, value): - if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"): value = zlib.compress(dill.dumps(value), level=5) - return super(BaseImage, self).__setitem__(key,value) + return super(BaseImage, self).__setitem__(key, value) # setbounding box start coordinate and size def setBBox(self): # add self["img_bbox"] = (x, y, width, heigh) - osh = self["os_handle"] + image_handle = self.image_handle # set default bbox - (dim_width, dim_height) = osh.dimensions + (dim_width, dim_height) = image_handle.dimensions self["img_bbox"] = (0, 0, dim_width, dim_height) # try to get bbox if bounding_box is ture - if self["enable_bounding_box"]: - # try get bbox from os handle properties - try: - x = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA')) - y = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA')) - width = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA')) - height = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA')) - self["img_bbox"] = (x, y, width, height) - except: - # no bbox info in slide set enable_bounding_box as Flase - self["enable_bounding_box"] = False - logging.warning(f"{self['filename']}: Bounding Box requested but could not read") - self["warnings"].append("Bounding Box requested but could not read") + + # Does WSI have bounding box + if self["enable_bounding_box"] and image_handle.has_bounding_box: + self["img_bbox"] = image_handle.bounding_box + elif self["enable_bounding_box"] and not image_handle.has_bounding_box: + self["enable_bounding_box"] = False + logging.warning(f"{self['filename']}: Bounding Box requested but could not read") + self["warnings"].append("Bounding Box requested but could not read") def addToPrintList(self, name, val): self[name] = val @@ -101,14 +130,14 @@ def addToPrintList(self, name, val): # find the next higher level by giving a downsample factor # return (level, isFindCloseLevel) - def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: - osh = self["os_handle"] - relative_down_factors_idx=[np.isclose(i/downsample_factor,1,atol=.01) for i in osh.level_downsamples] - level=np.where(relative_down_factors_idx)[0] + def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: + osh = self.image_handle + relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in osh.level_downsamples] + level = np.where(relative_down_factors_idx)[0] if level.size: - return (level[0], True) + return level[0], True else: - return (osh.get_best_level_for_downsample(downsample_factor), False) + return osh.get_best_level_for_downsample(downsample_factor), False @staticmethod def is_valid_size(size: str): @@ -133,9 +162,9 @@ def getImgThumb(self, size: str): # return the img if it exists if key in self: return self[key] - + # get open slide handle - osh = self["os_handle"] + image_handle = self.image_handle # get the size of view on current img - the current size of view by using the bounding box. # bounding box could be the size of whole img or read the size from the slide mate data. @@ -157,11 +186,12 @@ def getImgThumb(self, size: str): # magnification base_mag = self["base_mag"] target_sampling_factor = base_mag / target_mag - target_dims = tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int)) - + target_dims = cast(Tuple[int, int], + tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int))) + # generate the thumb img - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) - + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) + # the size of the img is number elif size.replace(".", "0", 1).isdigit(): size = float(size) @@ -169,30 +199,33 @@ def getImgThumb(self, size: str): if size < 1: target_downscaling_factor = size target_sampling_factor = 1 / target_downscaling_factor - target_dims = tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int)) - + target_dims = cast(Tuple[int, int], + tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int))) + # generate the thumb img - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) # specifies a desired level of open slide elif size < 100: target_level = int(size) - if target_level >= osh.level_count: - target_level = osh.level_count - 1 - msg = f"Desired Image Level {size+1} does not exist! Instead using level {osh.level_count-1}! Downstream output may not be correct" - logging.error(f"{self['filename']}: {msg}" ) + if target_level >= image_handle.level_count: + target_level = image_handle.level_count - 1 + msg = (f"Desired Image Level {size + 1} does not exist!" + f" Instead using level {image_handle.level_count - 1}! Downstream output may not be correct") + logging.error(f"{self['filename']}: {msg}") self["warnings"].append(msg) - size = (tuple((np.array(img_base_size)/osh.level_downsamples[target_level]).astype(int)) + size = (tuple((np.array(img_base_size) / image_handle.level_downsamples[target_level]).astype(int)) if self["enable_bounding_box"] - else osh.level_dimensions[target_level]) + else image_handle.level_dimensions[target_level]) logging.info( - f"{self['filename']} - \t\tloading image from level {target_level} of size {osh.level_dimensions[target_level]}") - tile = osh.read_region((bx, by), target_level, size) - self[key] = (np.asarray(rgba2rgb(self, tile)) - if np.shape(tile)[-1]==4 - else np.asarray(tile)) - - # specifies a desired size of thumbnail + f"{self['filename']} - \t\tloading image from level {target_level} of size" + f" {image_handle.level_dimensions[target_level]}") + tile = image_handle.read_region((bx, by), target_level, size) + self[key] = (np.asarray(self.image_handle.backend_rgba2rgb(tile)) + if np.shape(tile)[-1] == 4 + else np.asarray(tile)) + + # specifies a desired size of thumbnail else: # recommend having the dimension is less than 10k if size > 10000: @@ -202,89 +235,19 @@ def getImgThumb(self, size: str): self["warnings"].append(msg) target_dims = getDimensionsByOneDim(self, int(size)) target_sampling_factor = img_base_size[0] / target_dims[0] - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) return self[key] -def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float): - osh = s["os_handle"] - - # get thumb from og - if not s["enable_bounding_box"]: - max_dim = dims[0] if dims[0] > dims[1] else dims[1] - return np.array(osh.get_thumbnail((max_dim, max_dim))) - - (level, isExactLevel) = s.getBestLevelForDownsample(target_sampling_factor) - - # check if get the existing level - if isExactLevel: - tile = osh.read_region((x, y), level, dims) - return np.asarray(rgba2rgb(s, tile)) if np.shape(tile)[-1]==4 else np.asarray(tile) - # scale down the thumb img from the next high level - else: - return resizeTileDownward(s, target_sampling_factor, level) - -''' -the followings are helper functions -''' -def resizeTileDownward(self, target_downsampling_factor, level): - osh = self["os_handle"] - (bx, by, bwidth, bheight) = self["img_bbox"] - end_x = bx + bwidth - end_y = by + bheight - - cloest_downsampling_factor = osh.level_downsamples[level] - win_size = 2048 - - # create a new img - output = [] - for x in range(bx, end_x, win_size): - row_piece = [] - for y in range(by, end_y, win_size): - win_width, win_height = [win_size] * 2 - # Adjust extraction size for endcut - if end_x < x + win_width: - win_width = end_x - x - if end_y < y + win_height: - win_height = end_y - y - - - win_down_width = int(round(win_width / target_downsampling_factor)) - win_down_height = int(round(win_height / target_downsampling_factor)) - - win_width = int(round(win_width / cloest_downsampling_factor)) - win_height = int(round(win_height / cloest_downsampling_factor)) - - # TODO Note: this isn't very efficient, and if more efficiency isneeded - # We should likely refactor using "paste" from Image. - # Or even just set the pixels directly with indexing. - cloest_region = osh.read_region((x, y), level, (win_width, win_height)) - if np.shape(cloest_region)[-1]==4: - cloest_region = rgba2rgb(self, cloest_region) - target_region = cloest_region.resize((win_down_width, win_down_height)) - row_piece.append(target_region) - row_piece = np.concatenate(row_piece, axis=0) - - output.append(row_piece) - output = np.concatenate(output, axis=1) - return output - - -def rgba2rgb(s: BaseImage, img): - bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff") - thumb = Image.new("RGB", img.size, bg_color) - thumb.paste(img, None, img) - return thumb - - -def printMaskHelper(type: str, prev_mask, curr_mask): - if type == "relative2mask": + +def printMaskHelper(statistic_type: str, prev_mask, curr_mask): + if statistic_type == "relative2mask": if len(prev_mask.nonzero()[0]) == 0: return str(-100) else: return str(1 - len(curr_mask.nonzero()[0]) / len(prev_mask.nonzero()[0])) - elif type == "relative2image": + elif statistic_type == "relative2image": return str(len(curr_mask.nonzero()[0]) / np.prod(curr_mask.shape)) - elif type == "absolute": + elif statistic_type == "absolute": return str(len(curr_mask.nonzero()[0])) else: return str(-1) @@ -306,29 +269,23 @@ def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]: return MAG_NA # regex determines X must either be abscent or at the end of the string if "X" in mag.upper(): - mag = mag[0:-1] + mag = mag[0: -1] return float(mag) # this function is seperated out because in the future we hope to have automatic detection of # magnification if not present in open slide, and/or to confirm openslide base magnification def getMag(s: BaseImage, params) -> Union[float, None]: - logging.info(f"{s['filename']} - \tgetMag") - osh = s["os_handle"] - mag = osh.properties.get("openslide.objective-power") or \ - osh.properties.get("aperio.AppMag") or MAG_NA - # if mag or strtobool(params.get("confirm_base_mag", "False")): - # # do analysis work here - # logging.warning(f"{s['filename']} - Unknown base magnification for file") - # s["warnings"].append(f"{s['filename']} - Unknown base magnification for file") - # return None - # else: + osh = s.image_handle + mag = osh.magnification or MAG_NA # workaround for unspecified mag -- with or without automatic detection it might be preferred to have # mag predefined mag = mag or parsed_mag(params.get("base_mag")) # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise # the exception here. - return float(mag) if mag is not MAG_NA else MAG_NA + mag = float(mag) if mag is not MAG_NA else MAG_NA + logging.info(f"{s['filename']} - \tgetMag = {mag}") + return mag def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]: diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py index 418c715..4d4dbcd 100644 --- a/histoqc/BasicModule.py +++ b/histoqc/BasicModule.py @@ -2,30 +2,30 @@ import os from histoqc.BaseImage import printMaskHelper from skimage.morphology import remove_small_objects, binary_opening, disk -from skimage import io, color, img_as_ubyte +from skimage import io +from skimage.util import img_as_ubyte +from histoqc.BaseImage import BaseImage -import matplotlib.pyplot as plt - -def getBasicStats(s, params): +def getBasicStats(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetBasicStats") - osh = s["os_handle"] - s.addToPrintList("type", osh.properties.get("openslide.vendor", "NA")) - s.addToPrintList("levels", osh.properties.get("openslide.level-count", "NA")) - s.addToPrintList("height", osh.properties.get("openslide.level[0].height", "NA")) - s.addToPrintList("width", osh.properties.get("openslide.level[0].width", "NA")) - s.addToPrintList("mpp_x", osh.properties.get("openslide.mpp-x", "NA")) - s.addToPrintList("mpp_y", osh.properties.get("openslide.mpp-y", "NA")) - s.addToPrintList("comment", osh.properties.get("openslide.comment", "NA").replace("\n", " ").replace("\r", " ")) + osh = s.image_handle + s.addToPrintList("type", osh.vendor) + s.addToPrintList("levels", osh.level_count) + s.addToPrintList("height", osh.dimensions[1] if len(osh.dimensions) >= 2 else "NA") + s.addToPrintList("width", osh.dimensions[0] if len(osh.dimensions) >= 2 else "NA") + s.addToPrintList("mpp_x", osh.mpp_x) + s.addToPrintList("mpp_y", osh.mpp_y) + s.addToPrintList("comment", osh.comment.replace("\n", " ").replace("\r", " ")) return -def finalComputations(s, params): +def finalComputations(s: BaseImage, params): mask = s["img_mask_use"] s.addToPrintList("pixels_to_use", str(len(mask.nonzero()[0]))) -def finalProcessingSpur(s, params): +def finalProcessingSpur(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingSpur") disk_radius = int(params.get("disk_radius", "25")) selem = disk(disk_radius) @@ -43,12 +43,14 @@ def finalProcessingSpur(s, params): if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BasicModule.finalProcessingSpur" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"After BasicModule.finalProcessingSpur" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") -def finalProcessingArea(s, params): +def finalProcessingArea(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingArea") area_thresh = int(params.get("area_threshold", "1000")) mask = s["img_mask_use"] @@ -66,6 +68,8 @@ def finalProcessingArea(s, params): if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BasicModule.finalProcessingArea" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"After BasicModule.finalProcessingArea" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py index 8c55890..9900780 100644 --- a/histoqc/BlurDetectionModule.py +++ b/histoqc/BlurDetectionModule.py @@ -2,21 +2,18 @@ import os import skimage -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte, morphology, measure +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io, morphology, measure +from skimage.util import img_as_ubyte from skimage.color import rgb2gray -from skimage.filters import rank import numpy as np -import matplotlib.pyplot as plt - - # Analysis of focus measure operators for shape-from-focus # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012 # https://pdfs.semanticscholar.org/8c67/5bf5b542b98bf81dcf70bd869ab52ab8aae9.pdf -def identifyBlurryRegions(s, params): +def identifyBlurryRegions(s: BaseImage, params): logging.info(f"{s['filename']} - \tidentifyBlurryRegions") blur_radius = int(params.get("blur_radius", 7)) @@ -27,8 +24,9 @@ def identifyBlurryRegions(s, params): img_laplace = np.abs(skimage.filters.laplace(img)) mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold - mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, - 1] # for some reason resize takes a grayscale and produces a 3chan + # for some reason resize takes a grayscale and produces a 3chan + mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, 1] + mask = s["img_mask_use"] & (mask > 0) io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask)) @@ -46,20 +44,19 @@ def identifyBlurryRegions(s, params): else: nobj = area_max = area_mean = 0 - s.addToPrintList("blurry_removed_num_regions", str(nobj)) s.addToPrintList("blurry_removed_mean_area", str(area_mean)) s.addToPrintList("blurry_removed_max_area", str(area_max)) - s.addToPrintList("blurry_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions " + f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") - + f"After BlurDetectionModule.identifyBlurryRegions" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") return diff --git a/histoqc/BrightContrastModule.py b/histoqc/BrightContrastModule.py index 89dbfbd..df98aec 100644 --- a/histoqc/BrightContrastModule.py +++ b/histoqc/BrightContrastModule.py @@ -3,9 +3,10 @@ from skimage.filters import sobel from skimage.color import convert_colorspace, rgb2gray from distutils.util import strtobool +from histoqc.BaseImage import BaseImage -def getBrightnessGray(s, params): +def getBrightnessGray(s: BaseImage, params): prefix = params.get("prefix", None) prefix = prefix+"_" if prefix else "" logging.info(f"{s['filename']} - \tgetContrast:{prefix}") @@ -31,7 +32,7 @@ def getBrightnessGray(s, params): return -def getBrightnessByChannelinColorSpace(s, params): +def getBrightnessByChannelinColorSpace(s: BaseImage, params): prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -43,17 +44,16 @@ def getBrightnessByChannelinColorSpace(s, params): invert = strtobool(params.get("invert", "False")) - img = s.getImgThumb(s["image_work_size"]) suffix = "" - if (to_color_space != "RGB"): + if to_color_space != "RGB": img = convert_colorspace(img, "RGB", to_color_space) suffix = "_" + to_color_space for chan in range(0, 3): vals = img[:, :, chan] - if (limit_to_mask): + if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] vals = vals[mask] @@ -67,7 +67,7 @@ def getBrightnessByChannelinColorSpace(s, params): return -def getContrast(s, params): +def getContrast(s: BaseImage, params): prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -77,10 +77,9 @@ def getContrast(s, params): invert = strtobool(params.get("invert", "False")) - img = s.getImgThumb(s["image_work_size"]) img = rgb2gray(img) - + # noinspection PyTypeChecker sobel_img = sobel(img) ** 2 if limit_to_mask: @@ -90,14 +89,13 @@ def getContrast(s, params): sobel_img = sobel_img[mask] img = img[s["img_mask_use"]] - if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to - # produce metrics for completeness with warning + if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to + # produce metrics for completeness with warning - s.addToPrintList(f"{prefix}tenenGrad_contrast", str(-100)) + s.addToPrintList(f"{prefix}tenen_grad_contrast", str(-100)) s.addToPrintList(f"{prefix}michelson_contrast", str(-100)) s.addToPrintList(f"{prefix}rms_contrast", str(-100)) - logging.warning(f"{s['filename']} - After BrightContrastModule.getContrast: NO tissue " f"detected, statistics are impossible to compute, defaulting to -100 !") s["warnings"].append(f"After BrightContrastModule.getContrast: NO tissue remains " @@ -105,10 +103,9 @@ def getContrast(s, params): return - # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask - tenenGrad_contrast = np.sqrt(np.sum(sobel_img)) / img.size - s.addToPrintList(f"{prefix}tenenGrad_contrast", str(tenenGrad_contrast)) + tenen_grad_contrast = np.sqrt(np.sum(sobel_img)) / img.size + s.addToPrintList(f"{prefix}tenen_grad_contrast", str(tenen_grad_contrast)) # Michelson contrast max_img = img.max() diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py index afd62f5..e5d2249 100644 --- a/histoqc/BubbleRegionByRegion.py +++ b/histoqc/BubbleRegionByRegion.py @@ -1,50 +1,35 @@ import logging import os -import sys -from ast import literal_eval as make_tuple - -from distutils.util import strtobool from histoqc.BaseImage import printMaskHelper +from scipy.signal import convolve2d -import scipy.signal - -from skimage import io, img_as_ubyte -from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from skimage.util import img_as_ubyte +from skimage.filters import frangi from skimage.color import rgb2gray -from skimage.morphology import remove_small_objects, disk, binary_opening -from skimage.feature import local_binary_pattern - -from skimage.transform import rescale, resize, downscale_local_mean - -from math import ceil - -from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier - +from skimage.morphology import remove_small_objects +from histoqc.BaseImage import BaseImage from skimage import io, color - - import numpy as np -import matplotlib.pyplot as plt global_holder = {} -#WARNING: Not as robust as other modules -def roiWise(s, params): + +# WARNING: Not as robust as other modules +def roiWise(s: BaseImage, params): name = params.get("name", "classTask") print("\tpixelWise:\t", name, end="") level = int(params.get("level", 1)) - win_size = int(params.get("win_size", 2048)) #the size of the ROI which will be iteratively considered + win_size = int(params.get("win_size", 2048)) # the size of the ROI which will be iteratively considered - osh = s["os_handle"] + osh = s.image_handle dim_base = osh.level_dimensions[0] dims = osh.level_dimensions[level] - ratio_x = dim_base[0] / dims[0] #figure out the difference between desi + ratio_x = dim_base[0] / dims[0] # figure out the difference between desi ratio_y = dim_base[1] / dims[1] frangi_scale_range = (1, 6) @@ -58,13 +43,15 @@ def roiWise(s, params): row_piece = [] print('.', end='', flush=True) for y in range(0, dim_base[1], round(win_size * ratio_y)): - region = np.asarray(osh.read_region((x, y), 1, (win_size, win_size))) + region = osh.read_region((x, y), 1, (win_size, win_size)) + region = np.array(region) region = region[:, :, 0:3] # remove alpha channel g = rgb2gray(region) + # todo -- forward compatibility. Later version of frangi alters the signatures feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges) feat = feat / 8.875854409275627e-08 region_mask = np.bitwise_and(g < .3, feat > 5) - region_mask = remove_small_objects(region_mask, min_size=100, in_place=True) + region_mask = remove_small_objects(region_mask, min_size=100) # region_std = region.std(axis=2) # region_gray = rgb2gray(region) # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255) @@ -77,52 +64,54 @@ def roiWise(s, params): mask = np.concatenate(mask, axis=1) if params.get("area_threshold", "") != "": - mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), in_place=True) + # forward compatibility + # inplace=True is equivalent to out=mask. Therefore, it is removed in future version + mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), out=mask) s.addToPrintList(name, str(mask.mean())) - #TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask - #s.addToPrintList(name, - # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + # TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask + # s.addToPrintList(name, + # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), + # prev_mask, s["img_mask_use"])) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) #.astype(np.uint8) * 255) + # .astype(np.uint8) * 255) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) return -def detectSmoothness(s, params): - logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") - thresh = float(params.get("threshold", ".01" )) - kernel_size = int(params.get("kernel_size", "10")) - min_object_size = int(params.get("min_object_size", "100")) +def detectSmoothness(s: BaseImage, params): + logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") + thresh = float(params.get("threshold", ".01")) + kernel_size = int(params.get("kernel_size", "10")) + min_object_size = int(params.get("min_object_size", "100")) + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) - img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) + imf = convolve2d(img, avg, mode="same") + mask_flat = abs(imf - img) < thresh - imf = scipy.signal.convolve2d(img, avg, mode="same") - mask_flat = abs(imf - img) < thresh + mask_flat = remove_small_objects(mask_flat, min_size=min_object_size) + mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size) - mask_flat = remove_small_objects(mask_flat, min_size=min_object_size) - mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size) + prev_mask = s["img_mask_use"] + s["img_mask_flat"] = mask_flat - prev_mask = s["img_mask_use"] - s["img_mask_flat"] = mask_flat + io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask)) + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] + s.addToPrintList("flat_areas", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, + s["img_mask_use"])) + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") - s.addToPrintList("flat_areas", - printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, - s["img_mask_use"])) - - if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty - logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue " - f"remains detectable! Downstream modules likely to be incorrect/fail") - s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " - f"detectable! Downstream modules likely to be incorrect/fail") - - return + return diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 9567a49..fdbe039 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -7,8 +7,9 @@ from distutils.util import strtobool -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte, img_as_bool +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io +from skimage.util import img_as_ubyte, img_as_bool from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace from skimage.color import rgb2gray from skimage.morphology import remove_small_objects, disk, dilation @@ -21,10 +22,8 @@ import numpy as np -import matplotlib.pyplot as plt - -def pixelWise(s, params): +def pixelWise(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tpixelWise:\t", name) @@ -34,7 +33,7 @@ def pixelWise(s, params): if fname == "": logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name) sys.exit(1) - return + model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1) img = s.getImgThumb(s["image_work_size"]) @@ -90,15 +89,19 @@ def compute_gaussian(img, params): gaussian_sigma = int(params.get("gaussian_sigma", 1)) gaussian_multichan = strtobool(params.get("gaussian_multichan", False)) - if (gaussian_multichan): - return gaussian(img, sigma=gaussian_sigma, multichannel=gaussian_multichan) + # todo: forward compatibility + # todo: after 0.19 default multichannel behavior is fixed and explicitly setting channel_axis is preferred. + # todo: multichannel is also deprecated in later versions + if gaussian_multichan: + return gaussian(img, sigma=gaussian_sigma, channel_axis=-1) else: return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None] def compute_median(img, params): median_disk_size = int(params.get("median_disk_size", 3)) - return median(rgb2gray(img), selem=disk(median_disk_size))[:, :, None] + # starting from 0.19, selem is deprecated and footprint is preferred. + return median(rgb2gray(img), footprint=disk(median_disk_size))[:, :, None] def compute_gabor(img, params): @@ -132,7 +135,8 @@ def compute_frangi(img, params): frangi_beta1 = float(params.get("frangi_beta1", .5)) frangi_beta2 = float(params.get("frangi_beta2", 15)) frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True")) - feat = frangi(rgb2gray(img), scale_range = frangi_scale_range, scale_step =frangi_scale_step, beta =frangi_beta1, gamma=frangi_beta2, black_ridges =frangi_black_ridges) + sigmas = frangi_scale_range + (frangi_scale_step,) + feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, black_ridges=frangi_black_ridges) return feat[:, :, None] # add singleton dimension @@ -147,7 +151,7 @@ def compute_features(img, params): return np.concatenate(feats, axis=2) -def byExampleWithFeatures(s, params): +def byExampleWithFeatures(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") @@ -158,12 +162,10 @@ def byExampleWithFeatures(s, params): if examples == "": logging.error(f"{s['filename']} - No examples provided in ClassificationModule.byExample for {name} !!") sys.exit(1) - return if params.get("features", "") == "": logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!") sys.exit(1) - return with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model # then it is shared with all other modules @@ -192,13 +194,13 @@ def byExampleWithFeatures(s, params): mask = mask.reshape(-1, 1) - if nsamples_per_example != -1: #sub sambling required - nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]*nsamples_per_example) + if nsamples_per_example != -1: # sub sampling required + nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0] + * nsamples_per_example) idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) eximg = eximg[idxkeep, :] mask = mask[idxkeep] - model_vals.append(eximg) model_labels = np.vstack((model_labels, mask)) @@ -216,14 +218,14 @@ def byExampleWithFeatures(s, params): cal = cal.reshape(img.shape[0], img.shape[1], 2) mask = cal[:, :, 1] > thresh - area_thresh = int(params.get("area_threshold", "5")) if area_thresh > 0: - mask = remove_small_objects(mask, min_size=area_thresh, in_place=True) + # inplace=True is redundant and deprecated. + mask = remove_small_objects(mask, min_size=area_thresh, out=mask) dilate_kernel_size = int(params.get("dilate_kernel_size", "0")) if dilate_kernel_size > 0: - mask = dilation(mask, selem=np.ones((dilate_kernel_size, dilate_kernel_size))) + mask = dilation(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size))) mask = s["img_mask_use"] & (mask > 0) diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py index dbb41f1..bff440b 100644 --- a/histoqc/DeconvolutionModule.py +++ b/histoqc/DeconvolutionModule.py @@ -2,18 +2,15 @@ import os import sys import numpy as np -from skimage import io, color, img_as_ubyte -from skimage.exposure import rescale_intensity +from skimage import io +from skimage.util import img_as_ubyte +from histoqc.BaseImage import BaseImage from skimage.color import separate_stains -from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb -from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \ - hpx_from_rgb # need to load all of these in case the user selects them from distutils.util import strtobool +from histoqc.import_wrapper import dynamic_import -import matplotlib.pyplot as plt - -def separateStains(s, params): +def separateStains(s: BaseImage, params): logging.info(f"{s['filename']} - \tseparateStains") stain = params.get("stain", "") use_mask = strtobool(params.get("use_mask", "True")) @@ -21,25 +18,23 @@ def separateStains(s, params): if stain == "": logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains") sys.exit(1) - return - - stain_matrix = getattr(sys.modules[__name__], stain, None) - if stain_matrix is None: + try: + stain_matrix = dynamic_import("skimage.color", stain, return_first=True) + except ImportError: logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") sys.exit(1) - return mask = s["img_mask_use"] - if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early + if use_mask and len(mask.nonzero()[0]) == 0: # -- lets just error check at the top if mask is empty and abort early for c in range(3): s.addToPrintList(f"deconv_c{c}_std", str(-100)) s.addToPrintList(f"deconv_c{c}_mean", str(-100)) io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " - f"remains detectable! Saving Black images") + f"remains detectable! Saving Black images") s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue " f"remains detectable! Saving Black images") @@ -54,7 +49,6 @@ def separateStains(s, params): clip_max_val = np.quantile(dc.flatten(), .99) dc = np.clip(dc, a_min=0, a_max=clip_max_val) - if use_mask: dc_sub = dc[mask] dc_min = dc_sub.min() diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py index e58707c..a57122f 100644 --- a/histoqc/HistogramModule.py +++ b/histoqc/HistogramModule.py @@ -4,11 +4,12 @@ from skimage import io import matplotlib.pyplot as plt from distutils.util import strtobool +from histoqc.BaseImage import BaseImage +# this holds a local copy of the histograms of the template images so that they need only be computed once +global_holder = {} -global_holder = {} #this holds a local copy of the histograms of the template images so that they need only be computed once - -def getHistogram(s, params): +def getHistogram(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetHistogram") limit_to_mask = strtobool(params.get("limit_to_mask", True)) bins = int(params.get("bins", 20)) @@ -35,19 +36,20 @@ def computeHistogram(img, bins, mask=-1): result = np.zeros(shape=(bins, 3)) for chan in range(0, 3): vals = img[:, :, chan].flatten() - if (isinstance(mask, np.ndarray)): + if isinstance(mask, np.ndarray): vals = vals[mask.flatten()] - result[:, chan] = np.histogram(vals, bins=bins, density=True, range=[0, 255])[0] + result[:, chan] = np.histogram(vals, bins=bins, density=True, range=(0, 255))[0] return result -def compareToTemplates(s, params): +def compareToTemplates(s: BaseImage, params): logging.info(f"{s['filename']} - \tcompareToTemplates") bins = int(params.get("bins", 20)) limit_to_mask = strtobool(params.get("limit_to_mask", True)) - if (not global_holder.get("templates", False)): #if the histograms haven't already been computed, compute and store them now + # if the histograms haven't already been computed, compute and store them now + if not global_holder.get("templates", False): templates = {} for template in params["templates"].splitlines(): templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins) @@ -56,16 +58,16 @@ def compareToTemplates(s, params): img = s.getImgThumb(s["image_work_size"]) - if (limit_to_mask): + if limit_to_mask: mask = s["img_mask_use"] if len(mask.nonzero()[0]) == 0: logging.warning(f"{s['filename']} - HistogramModule.compareToTemplates NO tissue " f"remains detectable in mask!") s["warnings"].append(f"HistogramModule.compareToTemplates NO tissue " - f"remains detectable in mask!") + f"remains detectable in mask!") - imghst = np.zeros((bins,3)) + imghst = np.zeros((bins, 3)) else: imghst = computeHistogram(img, bins, mask) diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py index 1f8d306..cf3290a 100644 --- a/histoqc/LightDarkModule.py +++ b/histoqc/LightDarkModule.py @@ -1,23 +1,21 @@ import logging import os import numpy as np -from histoqc.BaseImage import printMaskHelper -from skimage import io, color, img_as_ubyte +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io, color +from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage.filters import threshold_otsu, rank from skimage.morphology import disk from sklearn.cluster import KMeans from skimage import exposure -import matplotlib.pyplot as plt - - -def getIntensityThresholdOtsu(s, params): +def getIntensityThresholdOtsu(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu") name = params.get("name", "classTask") local = strtobool(params.get("local", "False")) - radius = float(params.get("radius", 15)) + radius = int(params.get("radius", 15)) selem = disk(radius) img = s.getImgThumb(s["image_work_size"]) @@ -28,9 +26,9 @@ def getIntensityThresholdOtsu(s, params): else: thresh = threshold_otsu(img) - map = img < thresh + region_below_thresh = img < thresh - s["img_mask_" + name] = map > 0 + s["img_mask_" + name] = region_below_thresh > 0 if strtobool(params.get("invert", "False")): s["img_mask_" + name] = ~s["img_mask_" + name] @@ -51,10 +49,9 @@ def getIntensityThresholdOtsu(s, params): return -def getIntensityThresholdPercent(s, params): +def getIntensityThresholdPercent(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}") - lower_thresh = float(params.get("lower_threshold", "-inf")) upper_thresh = float(params.get("upper_threshold", "inf")) @@ -70,13 +67,11 @@ def getIntensityThresholdPercent(s, params): map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std) img = color.rgb2gray(img) - map = np.bitwise_and(img > lower_thresh, img < upper_thresh) - - map = np.bitwise_and(map, map_std) - - s["img_mask_" + name] = map > 0 + region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh) + region_between_interval = np.bitwise_and(region_between_interval, map_std) + s["img_mask_" + name] = region_between_interval > 0 if strtobool(params.get("invert", "False")): s["img_mask_" + name] = ~s["img_mask_" + name] @@ -84,7 +79,8 @@ def getIntensityThresholdPercent(s, params): prev_mask = s["img_mask_use"] s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(prev_mask & ~s["img_mask_" + name])) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", + img_as_ubyte(prev_mask & ~s["img_mask_" + name])) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -98,11 +94,7 @@ def getIntensityThresholdPercent(s, params): return - - - - -def removeBrightestPixels(s, params): +def removeBrightestPixels(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels") # lower_thresh = float(params.get("lower_threshold", -float("inf"))) @@ -115,13 +107,13 @@ def removeBrightestPixels(s, params): img = color.rgb2gray(img) kmeans = KMeans(n_clusters=3, n_init=1).fit(img.reshape([-1, 1])) + # noinspection PyUnresolvedReferences brightest_cluster = np.argmax(kmeans.cluster_centers_) + # noinspection PyUnresolvedReferences darkest_point_in_brightest_cluster = (img.reshape([-1, 1])[kmeans.labels_ == brightest_cluster]).min() s["img_mask_bright"] = img > darkest_point_in_brightest_cluster - - if strtobool(params.get("invert", "False")): s["img_mask_bright"] = ~s["img_mask_bright"] @@ -142,8 +134,7 @@ def removeBrightestPixels(s, params): return - -def minimumPixelIntensityNeighborhoodFiltering(s,params): +def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering") disk_size = int(params.get("disk_size", 10000)) threshold = int(params.get("upper_threshold", 200)) @@ -156,7 +147,6 @@ def minimumPixelIntensityNeighborhoodFiltering(s,params): imgfilt = rank.minimum(img, selem) s["img_mask_bright"] = imgfilt > threshold - if strtobool(params.get("invert", "True")): s["img_mask_bright"] = ~s["img_mask_bright"] @@ -176,7 +166,8 @@ def minimumPixelIntensityNeighborhoodFiltering(s,params): return -def saveEqualisedImage(s,params): + +def saveEqualisedImage(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage") img = s.getImgThumb(s["image_work_size"]) diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py index 6d3733f..610141c 100644 --- a/histoqc/LocalTextureEstimationModule.py +++ b/histoqc/LocalTextureEstimationModule.py @@ -1,10 +1,8 @@ import logging import numpy as np -from skimage import color +from skimage import color from distutils.util import strtobool from skimage.feature import graycomatrix, graycoprops -import matplotlib.pyplot as plt - def estimateGreyComatrixFeatures(s, params): @@ -15,17 +13,17 @@ def estimateGreyComatrixFeatures(s, params): patch_size = int(params.get("patch_size", 32)) npatches = int(params.get("npatches", 100)) nlevels = int(params.get("nlevels", 8)) - feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') + feats = params.get("feats", "contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') invert = strtobool(params.get("invert", "False")) - mask_name = params.get("mask_name","img_mask_use") - + mask_name = params.get("mask_name", "img_mask_use") img = s.getImgThumb(s["image_work_size"]) img = color.rgb2gray(img) mask = s[mask_name] if not invert else ~s[mask_name] if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask - msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask" + msg = (f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}" + f" Can not estimate the empty mask since NO tissue remains detectable in mask") logging.warning(f"{s['filename']} - {msg}") s["warnings"].append(msg) return @@ -36,10 +34,10 @@ def estimateGreyComatrixFeatures(s, params): results = [] - for id in idx: - r, c = maskidx[id, :] + for index in idx: + r, c = maskidx[index, :] patch = img[r:r + patch_size, c:c + patch_size] - glcm = graycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5], + glcm = graycomatrix(np.digitize(patch, np.linspace(0, 1, num=nlevels), right=True), distances=[5], angles=[0], levels=nlevels, symmetric=True, normed=True) results.append([graycoprops(glcm, prop=feat) for feat in feats]) diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py index 68e8437..d660cfe 100644 --- a/histoqc/MorphologyModule.py +++ b/histoqc/MorphologyModule.py @@ -2,12 +2,10 @@ import os import numpy as np from histoqc.BaseImage import printMaskHelper -from skimage import io, morphology, img_as_ubyte, measure - +from skimage import io, morphology, measure +from skimage.util import img_as_ubyte from scipy import ndimage as ndi - -import matplotlib.pyplot as plt # these 2 are used for debugging -from histoqc.SaveModule import blend2Images #for easier debugging +from typing import cast def removeSmallObjects(s, params): @@ -22,7 +20,6 @@ def removeSmallObjects(s, params): prev_mask = s["img_mask_use"] s["img_mask_use"] = img_reduced - rps = measure.regionprops(morphology.label(img_small)) if rps: areas = np.asarray([rp.area for rp in rps]) @@ -36,14 +33,9 @@ def removeSmallObjects(s, params): s.addToPrintList("small_tissue_removed_mean_area", str(area_mean)) s.addToPrintList("small_tissue_removed_max_area", str(area_max)) - - - - s.addToPrintList("small_tissue_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) - if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning(f"{s['filename']} - After MorphologyModule.removeSmallObjects: NO tissue " f"remains detectable! Downstream modules likely to be incorrect/fail") @@ -58,8 +50,8 @@ def remove_large_objects(img, max_size): selem = ndi.generate_binary_structure(img.ndim, 1) ccs = np.zeros_like(img, dtype=np.int32) ndi.label(img, selem, output=ccs) - component_sizes = np.bincount(ccs.ravel()) - too_big = component_sizes > max_size + component_sizes: np.ndarray = np.bincount(ccs.ravel()) + too_big: np.ndarray = cast(np.ndarray, component_sizes > max_size) too_big_mask = too_big[ccs] img_out = img.copy() img_out[too_big_mask] = 0 @@ -76,7 +68,7 @@ def removeFatlikeTissue(s, params): img_small = img_reduced & np.invert(s["img_mask_use"]) img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9) - mask_dilate = morphology.dilation(img_small, selem=np.ones((kernel_size, kernel_size))) + mask_dilate = morphology.dilation(img_small, footprint=np.ones((kernel_size, kernel_size))) mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size) mask_fat = mask_dilate & ~mask_dilate_removed @@ -100,8 +92,6 @@ def removeFatlikeTissue(s, params): s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean)) s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max)) - - s.addToPrintList("fatlike_tissue_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -118,7 +108,6 @@ def fillSmallHoles(s, params): img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size) img_small = img_reduced & np.invert(s["img_mask_use"]) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small)) s["img_mask_small_removed"] = (img_small * 255) > 0 diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py index 627f8ca..cd04be5 100644 --- a/histoqc/SaveModule.py +++ b/histoqc/SaveModule.py @@ -1,17 +1,17 @@ import logging import os -from skimage import io, img_as_ubyte +from skimage import io +from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage import color import numpy as np - -import matplotlib.pyplot as plt +from histoqc.BaseImage import BaseImage def blend2Images(img, mask): - if (img.ndim == 3): + if img.ndim == 3: img = color.rgb2gray(img) - if (mask.ndim == 3): + if mask.ndim == 3: mask = color.rgb2gray(mask) img = img[:, :, None] * 1.0 # can't use boolean mask = mask[:, :, None] * 1.0 @@ -19,7 +19,7 @@ def blend2Images(img, mask): return out -def saveFinalMask(s, params): +def saveFinalMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveUsableRegion") mask = s["img_mask_use"] @@ -36,37 +36,42 @@ def saveFinalMask(s, params): return -def saveAssociatedImage(s, key:str, dim:int): +def saveAssociatedImage(s: BaseImage, key: str, dim: int): logging.info(f"{s['filename']} - \tsave{key.capitalize()}") - osh = s["os_handle"] + image_handle = s.image_handle - if not key in osh.associated_images: + if key not in image_handle.associated_images: message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images" logging.warning(message) s["warnings"].append(message) return # get asscociated image by key - associated_img = osh.associated_images[key] - (width, height) = associated_img.size - - # calulate the width or height depends on dim - if width > height: - h = round(dim * height / width) - size = (dim, h) - else: - w = round(dim * width / height) - size = (w, dim) - - associated_img = associated_img.resize(size) - associated_img = np.asarray(associated_img)[:, :, 0:3] - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img) + associated_img = image_handle.associated_images[key] + width, height = image_handle.__class__.backend_dim(associated_img) + + if width * height == 0: + message = f"{s['filename']}- Irregular Size {width, height} of the Associated Images: {key}" + logging.warning(message) + s["warnings"].append(message) + return + + aspect_ratio = width / height + size = image_handle.__class__.curate_to_max_dim(width, height, max_size=dim, aspect_ratio=aspect_ratio) + # to pillow handle + associated_img = image_handle.__class__.backend_to_pil(associated_img) + # resize the pil (RGB) + associated_img = associated_img.resize(size).convert("RGB") + # save the pil + associated_img.save(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png") + def saveMacro(s, params): dim = params.get("small_dim", 500) saveAssociatedImage(s, "macro", dim) return - + + def saveMask(s, params): logging.info(f"{s['filename']} - \tsaveMaskUse") suffix = params.get("suffix", None) @@ -80,6 +85,7 @@ def saveMask(s, params): # save mask io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) + def saveThumbnails(s, params): logging.info(f"{s['filename']} - \tsaveThumbnail") # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py index 0101ca4..07bf184 100644 --- a/histoqc/TileExtractionModule.py +++ b/histoqc/TileExtractionModule.py @@ -4,7 +4,6 @@ are open. """ import os -import openslide import json from histoqc.BaseImage import BaseImage from typing import Callable, Dict, Any, List, Tuple, Union @@ -14,7 +13,8 @@ from contextlib import contextmanager from distutils.util import strtobool import logging -from histoqc.import_wrapper.typing import Literal, get_args +from typing_extensions import Literal, get_args +from PIL.Image import Image as PILImage # from histoqc.import_wrapper.helper import dynamic_import # __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] # Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") @@ -269,10 +269,10 @@ def max_tile_bbox_top_left_coord(rp_bbox: TYPE_BBOX_INT, work_tile_size: float, tile_max_left = left_rp + max_step_horiz * work_stride tile_max_top = top_rp + max_step_vert * work_stride - assert round(tile_max_left + work_tile_size) <= right_rp,\ + assert round(tile_max_left + work_tile_size) <= right_rp, \ f"left + size check" \ f" {tile_max_left + work_tile_size} = {tile_max_left} + {work_tile_size} <= {right_rp} fail" - assert round(tile_max_top + work_tile_size) <= bottom_rp,\ + assert round(tile_max_top + work_tile_size) <= bottom_rp, \ f"top + size check" \ f" {tile_max_top + work_tile_size} = {tile_max_top} + {work_tile_size} <= {bottom_rp} fail" return int(tile_max_top), int(tile_max_left) @@ -345,8 +345,6 @@ def _tile_windows_helper(mask_use_for_tiles, """ mask = mask_use_for_tiles - # image_handle: openslide.OpenSlide = s["os_handle"] - # img_w, img_h = image_handle.dimensions assert mask is not None, f"{filename}: mask is not initialized" assert isinstance(mask, np.ndarray), f"The mask is expected to be a Numpy NDArray" @@ -393,7 +391,7 @@ def tile_windows(self, root_dict = self.__tile_window_cache entry = root_dict.get(key, None) if entry is None or force_rewrite: - # img_w, img_h = s['os_handle'].dimensions + # img_w, img_h = s.image_handle.dimensions root_dict[key] = TileExtractor._tile_windows_helper(mask_use_for_tiles, img_w, img_h, tile_size, tile_stride, tissue_thresh) @@ -532,7 +530,7 @@ def valid_tile_extraction(self, tw: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h, tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite) window_list_of_regions = tw.windows_on_original_image - image_handle: openslide.OpenSlide = s["os_handle"] + image_handle = s.image_handle valid_window_list_all_regions: List[List[Tuple[int, int, int, int]]] = [] for region_windows in window_list_of_regions: region_windows: List[Tuple[int, int, int, int]] @@ -541,7 +539,7 @@ def valid_tile_extraction(self, window: Tuple[int, int, int, int] # just to make the convention clear location, size = TileExtractor.__window_convert(window) - region = image_handle.read_region(location, 0, size) + region: PILImage = image_handle.read_region(location, 0, size) tile_np = np.array(region, copy=False) valid_flag = screen_callbacks(tile_np) if not valid_flag: @@ -573,7 +571,7 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]): img_use_for_tiles = s.getImgThumb(s["image_work_size"]) mask_use_for_tiles = s['img_mask_use'] - image_handle: openslide.OpenSlide = s['os_handle'] + image_handle = s.image_handle img_w, img_h = image_handle.dimensions tile_extractor = TileExtractor(s) diff --git a/histoqc/__main__.py b/histoqc/__main__.py index c0bd0c0..415f479 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -30,7 +30,9 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(prog="histoqc", description='Run HistoQC main quality control pipeline for digital pathology images') + parser = argparse.ArgumentParser(prog="histoqc", + description='Run HistoQC main quality control pipeline' + ' for digital pathology images') parser.add_argument('input_pattern', help="input filename pattern (try: *.svs or target_path/*.svs )," " or tsv file containing list of files to analyze", @@ -76,7 +78,7 @@ def main(argv=None): lm.logger.warning(f"Configuration file not set (--config), using default") config.read_string(read_config_template('default')) elif os.path.exists(args.config): - config.read(args.config) #Will read the config file + config.read(args.config) # Will read the config file else: lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.") config.read_string(read_config_template(args.config)) @@ -223,5 +225,6 @@ def main(argv=None): ) return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py index 5012ce4..6c1a4e1 100644 --- a/histoqc/_pipeline.py +++ b/histoqc/_pipeline.py @@ -17,6 +17,8 @@ from importlib import import_module from logging.config import dictConfig from logging.handlers import QueueHandler +from typing_extensions import Literal +from typing import cast # --- logging helpers ------------------------------------------------- @@ -63,7 +65,8 @@ def setup_logging(*, capture_warnings, filter_warnings): }) # configure warnings too... - warnings.filterwarnings(filter_warnings) + filter_type = Literal["default", "error", "ignore", "always", "module", "once"] + warnings.filterwarnings(cast(filter_type, filter_warnings)) logging.captureWarnings(capture_warnings) @@ -310,7 +313,7 @@ def write_headers(self, *args): Parameters ---------- - state : dict + state: dict the current histoqc implementation writes the outputs to the header files, so *args supports `state` for now. overwrite in subclass to control header output behavior diff --git a/histoqc/_worker.py b/histoqc/_worker.py index 97e5ff6..54af16d 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -62,13 +62,9 @@ def worker(idx, file_name, *, raise exc else: - # TODO: - # the histoqc workaround below is due an implementation detail in BaseImage: - # BaseImage keeps an OpenSlide instance stored under os_handle and leaks a - # file handle. This will need fixing in BaseImage. - # -> best solution would be to make BaseImage a contextmanager and close - # and cleanup the OpenSlide handle on __exit__ - s["os_handle"] = None # need to get rid of handle because it can't be pickled + # So long as the gc is triggered to delete the handle, the close is called to release the resources, + # as documented in the openslide and cuimage's source code. + s.image_handle = None return s diff --git a/histoqc/annotations/annot_collection.py b/histoqc/annotations/annot_collection.py index 30a187c..42eab60 100644 --- a/histoqc/annotations/annot_collection.py +++ b/histoqc/annotations/annot_collection.py @@ -2,7 +2,7 @@ from types import MappingProxyType # from shapely.strtree import STRtree # from shapely.geometry import box as shapely_box -from histoqc.import_wrapper.typing import Literal, get_args +from typing_extensions import Literal, get_args from lazy_property import LazyProperty from .annotation.base import Annotation, Region, TYPE_RAW_LABEL from .annotation.imagescope import ImageScopeAnnotation diff --git a/histoqc/annotations/annotation/base.py b/histoqc/annotations/annotation/base.py index 36da725..d549e51 100644 --- a/histoqc/annotations/annotation/base.py +++ b/histoqc/annotations/annotation/base.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod -from typing import Generic, TypeVar, Dict, Union, List, Tuple, TypedDict +from typing import TypeVar, Dict, Union, List, Tuple, Generic +from typing_extensions import TypedDict from lazy_property import LazyProperty from shapely.geometry import Polygon, MultiPolygon import logging diff --git a/histoqc/annotations/annotation/geojson.py b/histoqc/annotations/annotation/geojson.py index cf5bf2e..fc1175a 100644 --- a/histoqc/annotations/annotation/geojson.py +++ b/histoqc/annotations/annotation/geojson.py @@ -1,5 +1,5 @@ from typing import List, Dict, Callable, Any # Literal, get_args -from histoqc.import_wrapper.typing import Literal, get_args +from typing_extensions import Literal, get_args from ..io_utils.json import load_json from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET diff --git a/histoqc/config/__main__.py b/histoqc/config/__main__.py index 3f2b1c0..493e09e 100644 --- a/histoqc/config/__main__.py +++ b/histoqc/config/__main__.py @@ -9,7 +9,7 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(prog="histoqc.config",description="Show example configuration files") + parser = argparse.ArgumentParser(prog="histoqc.config", description="Show example configuration files") parser.add_argument('--list', action='store_true', help='list available configs') diff --git a/histoqc/import_wrapper/__init__.py b/histoqc/import_wrapper/__init__.py index e69de29..e216514 100644 --- a/histoqc/import_wrapper/__init__.py +++ b/histoqc/import_wrapper/__init__.py @@ -0,0 +1 @@ +from .helper import dynamic_import diff --git a/histoqc/import_wrapper/helper.py b/histoqc/import_wrapper/helper.py index 7804e3a..aa97227 100644 --- a/histoqc/import_wrapper/helper.py +++ b/histoqc/import_wrapper/helper.py @@ -1,23 +1,84 @@ +from __future__ import annotations import importlib -from typing import Union +from typing import Optional, List -def dynamic_import(module_name: str, attribute_name: str, surrogate: Union[str, None]): - """ - Dynamically import the components from surrogate module if not available (e.g., `Literal` is only available in - typing from python3.8 but typing_extension provides the same functionality for python <=3.7. +def __dynamic_import(module_name: str, attribute_name: Optional[str]): + """Base function to import a module or a component from the module via importlib + Args: - module_name: - attribute_name: - surrogate: + module_name: name of the module + attribute_name: name of the attribute. If None, then returns the module itself. Returns: + imported module or component + Raises + ImportError: if the module cannot be imported or the attribute is not found. """ + assert module_name is not None and isinstance(module_name, str) module = importlib.import_module(module_name) - attribute = getattr(module, attribute_name, None) - if attribute is not None: - return attribute - if surrogate is not None: - return dynamic_import(surrogate, attribute_name, None) - raise ImportError(f"Cannot Import {attribute_name} from either {module_name} or {surrogate}") + if attribute_name is None: + return module + if not hasattr(module, attribute_name): + raise ImportError(f"'{module_name}' has no attribute '{attribute_name}'") + return getattr(module, attribute_name) + + +def __validate_names(names: str | List[str], pad_length: Optional[int] = 1) -> List[Optional[str]]: + """Validate the names provided to be not None and in form of a list. + + If the names is a str, returns a list with a singleton value of names and length of pad_length. + + Args: + names: the value to be validated + pad_length: length to + + Returns: + List of names. + """ + if isinstance(names, str) or names is None: + return [names] * pad_length + if isinstance(names, list) and len(names) == 1 and pad_length > 1: + return names * pad_length + assert isinstance(names, List), f"{type(names)}" + return names + + +def dynamic_import(module_names: List[str] | str, + attr_names: Optional[List[Optional[str]] | str], + return_first: bool): + """Dynamically import the module or attribute from the modules via importlib. + + Priority is defined by the order of module_names/attr_names. The function will continue to try to import the module + until all attempts fail and raise an ImportError in this case. + If return_first is True, the function only returns the first viable module/attribute. Otherwise, it returns the list + of all available modules/attributes. + + Args: + module_names: names of the modules. If it is str it will be padded to a list of single element: [names] + attr_names: names of the attributes. If None, then only the module is imported. + If it is a str or a list of single element, it will be padded to a list of same values + with same length as module_names. + return_first: If return_first is True, the function only returns the first viable module/attribute. + Returns: + imported module or attribute + + Raises: + ImportError: if the module cannot be imported or the attribute is not found. + """ + module_names = __validate_names(module_names) + attr_names = __validate_names(attr_names, pad_length=len(module_names)) + assert len(module_names) == len(attr_names) + out_list = [] + for module, attr in zip(module_names, attr_names): + try: + result = __dynamic_import(module, attr) + if return_first: + return result + out_list.append(result) + except ImportError: + continue + if len(out_list) == 0: + raise ImportError(f"Cannot Import from: {module_names}, {attr_names}") + return out_list diff --git a/histoqc/import_wrapper/openslide.py b/histoqc/import_wrapper/openslide.py index aaa007f..2e7c67c 100644 --- a/histoqc/import_wrapper/openslide.py +++ b/histoqc/import_wrapper/openslide.py @@ -1,9 +1,16 @@ +""" +For python >=3.8, the behavior of import dlls in Windows is changed. add_dll_directory is added to os and must be +manually called to include the path(s) of binaries. (in previous versions, extending the PATH variable is enough) +""" import os if hasattr(os, "add_dll_directory"): # specify your own openslide binary locations with os.add_dll_directory(os.path.join(os.getcwd(), 'bin')): + # noinspection PyUnresolvedReferences import openslide else: # os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] # #can either specify openslide bin path in PATH, or add it dynamically + # noinspection PyUnresolvedReferences import openslide + diff --git a/histoqc/import_wrapper/typing.py b/histoqc/import_wrapper/typing.py deleted file mode 100644 index 4e4ed03..0000000 --- a/histoqc/import_wrapper/typing.py +++ /dev/null @@ -1,6 +0,0 @@ -from .helper import dynamic_import -from typing import Type, Callable, Tuple, Any, TypeVar - -__TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] -Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") -get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions") diff --git a/histoqc/tests/test_pipeline_cli.py b/histoqc/tests/test_pipeline_cli.py index 244a075..2ebcf9d 100644 --- a/histoqc/tests/test_pipeline_cli.py +++ b/histoqc/tests/test_pipeline_cli.py @@ -56,6 +56,7 @@ def minimal_config(tmp_path_factory): [BaseImage.BaseImage] image_work_size = 1.25x + handles = openslide #three options: relative2mask, absolute, relative2image mask_statistics = relative2mask @@ -87,7 +88,8 @@ def test_cli_multiprocess_batching(multi_svs_dir, tmp_path, minimal_config, tmp_ '--symlink', os.fspath(spth), '*.svs' ]) == 0 - assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log', 'results_0.tsv', 'results_1.tsv']) + assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log', + 'results_0.tsv', 'results_1.tsv']) @pytest.fixture(scope='module') diff --git a/histoqc/tests/test_ui_cli.py b/histoqc/tests/test_ui_cli.py index c72aa4e..ff91739 100644 --- a/histoqc/tests/test_ui_cli.py +++ b/histoqc/tests/test_ui_cli.py @@ -1,8 +1,6 @@ import os import threading import time -from http.server import HTTPServer -from typing import Optional import pytest import requests diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py new file mode 100644 index 0000000..c5b5713 --- /dev/null +++ b/histoqc/wsi_handles/base.py @@ -0,0 +1,340 @@ +from abc import ABC, abstractmethod +from histoqc.import_wrapper import dynamic_import +import logging +from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic +import numpy as np +from PIL.Image import Image as PILImage +from typing_extensions import final +import os + +from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER + +T = TypeVar('T') +Backend = TypeVar('Backend') +ARRAY = TypeVar('ARRAY') + + +class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]): + + handle: T + fname: str + + @staticmethod + def curate_shorter_edge(width, height, limit, aspect_ratio): + """Simulate the PIL.Image.Image.thumbnail approach to curate the size. + + The target size should preserve the aspect ratio. + + Args: + width: + height: + limit: + aspect_ratio: + + Returns: + + """ + if height > width: + # limit the shorter one + width = max(width, limit) + height = round(width / aspect_ratio) + else: + height = max(height, limit) + width = round(height * aspect_ratio) + return width, height + + @staticmethod + def curate_to_max_dim(width, height, max_size, aspect_ratio): + """Set the longer one of width and height to max_size while preserving the aspect ratio. + + Args: + width: + height: + max_size: + aspect_ratio: + + Returns: + width, height tuple + """ + if height > width: + height = max_size + width = round(height * aspect_ratio) + else: + width = max_size + height = round(width / aspect_ratio) + return width, height + + @property + @abstractmethod + def associated_images(self) -> Mapping[str, Backend]: + ... + + @property + @abstractmethod + def background_color(self) -> str: + ... + + @property + @abstractmethod + def bounding_box(self) -> Tuple[int, int, int, int]: + ... + + @property + @abstractmethod + def has_bounding_box(self) -> bool: + ... + + @property + @abstractmethod + def dimensions(self) -> Tuple[int, int]: + """ + + Returns: + (width, height) tuple + """ + ... + + @property + @abstractmethod + def magnification(self) -> str: + ... + + @property + @abstractmethod + def level_count(self) -> int: + ... + + @property + @abstractmethod + def level_dimensions(self) -> Sequence[Tuple[int, int]]: + ... + + @property + @abstractmethod + def level_downsamples(self) -> Sequence[float]: + ... + + @property + @abstractmethod + def vendor(self) -> str: + ... + + @property + @abstractmethod + def mpp_x(self) -> str: + ... + + @property + @abstractmethod + def mpp_y(self) -> str: + ... + + @property + @abstractmethod + def comment(self) -> str: + ... + + @abstractmethod + def get_thumbnail(self, new_dim) -> Union[ARRAY, Backend]: + ... + + @abstractmethod + def backend_rgba2rgb(self, img) -> Backend: + """Remove the alpha channel with a predefined background color blended into the image. + + Args: + img: + + Returns: + R + """ + ... + + @abstractmethod + def region_backend(self, location, level, size, **kwargs): + ... + + @staticmethod + @abstractmethod + def backend_to_pil(region: Union[Backend, ARRAY]) -> PILImage: + ... + + @staticmethod + @abstractmethod + def array_to_numpy(arr: ARRAY) -> np.ndarray: + ... + + @staticmethod + @abstractmethod + def backend_dim(region: Backend) -> Tuple[int, int]: + """ + Defines the unified interface to obtain BACKEND handle type. + Args: + region: + + Returns: + + """ + ... + + @staticmethod + @abstractmethod + def array_shape(arr: ARRAY) -> Tuple[int, int]: + ... + + @staticmethod + @abstractmethod + def backend_to_array(region: Union[Backend, ARRAY]) -> ARRAY: + ... + + def read_region(self, location, level, size, **kwargs) -> PILImage: + region = self.region_backend(location=location, level=level, size=size, **kwargs) + return self.__class__.backend_to_pil(region) + + @abstractmethod + def read_label(self) -> Backend: + ... + + @abstractmethod + def read_macro(self) -> Backend: + ... + + @classmethod + @abstractmethod + def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int]) -> ARRAY: + ... + + @abstractmethod + def get_best_level_for_downsample(self, downsample_factor: float): + ... + + def curated_best_level_for_downsample(self, downsample_factor: float) -> Tuple[int, bool]: + relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in self.level_downsamples] + level = np.where(relative_down_factors_idx)[0] + if level.size: + return level[0], True + return self.get_best_level_for_downsample(downsample_factor), False + + @staticmethod + @abstractmethod + def grid_stack(grid: List[List[ARRAY]]): + ... + + def resize_tile_downward(self, target_downsampling_factor, level, + win_size: int = 2048, **read_region_kwargs) -> List[List[ARRAY]]: + + (bx, by, bwidth, bheight) = self.bounding_box + end_x = bx + bwidth + end_y = by + bheight + + closest_downsampling_factor = self.level_downsamples[level] + + # create a new img + grid = [] + for x in range(bx, end_x, win_size): + row_piece = [] + for y in range(by, end_y, win_size): + win_width, win_height = [win_size] * 2 + # Adjust extraction size for endcut + if end_x < x + win_width: + win_width = end_x - x + if end_y < y + win_height: + win_height = end_y - y + + win_down_width = int(round(win_width / target_downsampling_factor)) + win_down_height = int(round(win_height / target_downsampling_factor)) + + win_width = int(round(win_width / closest_downsampling_factor)) + win_height = int(round(win_height / closest_downsampling_factor)) + + # TODO Note: this isn't very efficient, and if more efficiency isneeded + + # TODO cont. Separate the public interface read_region -> PIL.Image to the internal data backend + # TODO (data_from_region) + # TODO e.g., cupy is far more efficient for resize w/ interpolation and antialiasing. + closest_region = self.region_backend(location=(x, y), level=level, size=(win_width, win_height), + **read_region_kwargs) + if np.shape(closest_region)[-1] == 4: + closest_region = self.backend_rgba2rgb(closest_region) + closest_region_arr = self.__class__.backend_to_array(closest_region) + target_region = self.__class__.region_resize_arr(closest_region_arr, + (win_down_width, win_down_height)) + row_piece.append(target_region) + # row_piece = np.concatenate(row_piece, axis=0) + grid.append(row_piece) + # grid = np.concatenate(output, axis=1) + # + return self.__class__.grid_stack(grid) + + def best_thumb(self, x: int, y: int, dims: Tuple[int, int], + target_sampling_factor: float, **read_region_kwargs) -> ARRAY: + + # get thumb from og + if not self.has_bounding_box: + max_dim = dims[0] if dims[0] > dims[1] else dims[1] + return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim))) + + (level, is_exact_level) = self.curated_best_level_for_downsample(target_sampling_factor) + + # check if to get the existing level + if is_exact_level: + backend: Backend = self.read_region((x, y), level, dims) + return self.__class__.backend_to_array(self.backend_rgba2rgb(backend)) \ + if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend) + # scale down the thumb img from the next high level + else: + return self.resize_tile_downward(target_sampling_factor, level, win_size=2048, **read_region_kwargs) + + @staticmethod + def parse_wsi_handles(handle_list: str | List[str], delimiter: str, + wsi_handle_dict: Dict[str, Tuple[str, str]]) -> Tuple[List[str], List[str]]: + if isinstance(handle_list, str): + handle_list = handle_list.split(delimiter) + module_list = [] + attr_list = [] + for handle_type in handle_list: + handle_type = handle_type.strip() + if handle_type not in wsi_handle_dict: + msg = f"WSIImageHandle: \"{handle_type}\" is not a registered handle" + logging.warning(msg) + continue + result = wsi_handle_dict[handle_type] + assert len(result) == 2, f"{result}" + module, attr = wsi_handle_dict[handle_type] + module_list.append(module) + attr_list.append(attr) + return module_list, attr_list + + @classmethod + def __create_handle(cls, fname: str, + handle_class_list: List[Callable[[str], "WSIImageHandle"]]) -> "WSIImageHandle": + image_handle = None + assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file" + for handle_class in handle_class_list: + # noinspection PyBroadException + try: + image_handle = handle_class(fname) + except Exception: + # current wsi handle class doesn't support this file + msg = f"WSIImageHandle: \"{handle_class}\" doesn't support {fname}" + logging.warning(msg) + continue + if image_handle is None: + # error: no handles support this file + msg = f"WSIImageHandle: can't find the support wsi handles - {fname}" + logging.error(msg) + raise NotImplementedError(msg) + return image_handle + + @classmethod + @final + def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle": + # get handles list + module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES) + handle_class_list = dynamic_import(module_list, attr_list, return_first=False) + image_handle = cls.__create_handle(fname, handle_class_list) + return image_handle + + def __init__(self, fname: str): + self.fname = fname + diff --git a/histoqc/wsi_handles/constants.py b/histoqc/wsi_handles/constants.py new file mode 100644 index 0000000..16a297b --- /dev/null +++ b/histoqc/wsi_handles/constants.py @@ -0,0 +1,20 @@ +from typing import Dict, Tuple +from typing_extensions import Literal, get_args + +TYPE_OPENSLIDE = Literal["openslide"] +KEY_OPENSLIDE: str = get_args(TYPE_OPENSLIDE)[0] +MODULE_OPENSLIDE: str = "histoqc.wsi_handles.openslide_handle" +CLASS_OPENSLIDE: str = "OpenSlideHandle" + +TYPE_CUCIM = Literal["cucim"] +KEY_CUCIM: str = get_args(TYPE_CUCIM)[0] +MODULE_CUCIM: str = "histoqc.wsi_handles.cuimage_handle" +CLASS_CUCIM: str = "CuImageHandle" + +WSI_HANDLES: Dict[str, Tuple[str, str]] = { + KEY_OPENSLIDE: (MODULE_OPENSLIDE, CLASS_OPENSLIDE), + # todo: add unified interface + # KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM), +} + +HANDLE_DELIMITER = ',' diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py new file mode 100644 index 0000000..e37172a --- /dev/null +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -0,0 +1,171 @@ +from __future__ import annotations +from PIL.Image import Image as PILImage +from cucim.clara import CuImage +from .base import WSIImageHandle +from PIL import Image +from ..import_wrapper.openslide import openslide +import cupy as cp +from typing import List, Union, Tuple, Mapping +from typing import cast +from lazy_property import LazyProperty +import numpy as np +from cucim import skimage as c_skimage + + +class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): + + handle: CuImage + fname: str + + # TODO: standalone parser of vendor information + dummy_handle: openslide.OpenSlide + + def backend_rgba2rgb(self, img: CuImage) -> CuImage: + # todo: it appears that CuImage does not take care of the alpha channel at all. + return img + + @classmethod + def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int]) -> cp.ndarray: + w, h, *_ = new_size_wh + arr = cp.array(data) + return c_skimage.transform.resize(arr, output_shape=(h, w)) + + def __init__(self, fname: str): + super().__init__(fname) + self.handle = CuImage(fname) + # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a + # todo - native unified metadata interface for different vendors. + self.dummy_handle = openslide.OpenSlide(fname) + + @LazyProperty + def background_color(self): + return f"#{self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + + @LazyProperty + def bounding_box(self): + dim_width, dim_height = self.dimensions + x = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0)) + y = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0)) + width = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width)) + height = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height)) + return x, y, width, height + + @LazyProperty + def has_bounding_box(self): + return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties + and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties + and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle.properties + and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle.properties + ) + + @LazyProperty + def dimensions(self): + return tuple(self.handle.metadata['cucim']['shape'][:2][::-1]) + + @LazyProperty + def magnification(self): + return self.dummy_handle.properties.get("openslide.objective-power") or \ + self.dummy_handle.properties.get("aperio.AppMag") + + @property + def level_count(self): + return self.handle.metadata['cucim']['resolutions']['level_count'] + + @property + def level_dimensions(self): + return self.handle.metadata['cucim']['resolutions']['level_dimensions'] + + @property + def level_downsamples(self): + return self.handle.metadata['cucim']['resolutions']['level_downsamples'] + + @property + def vendor(self): + return self.dummy_handle.properties.get("openslide.vendor", "NA") + + @property + def mpp_x(self): + return self.dummy_handle.properties.get("openslide.mpp-x", "NA") + + @property + def mpp_y(self): + return self.dummy_handle.properties.get("openslide.mpp-y", "NA") + + @property + def comment(self): + return self.dummy_handle.properties.get("openslide.comment", "NA") + + def get_thumbnail(self, new_dim): + """Get thumbnail + + Args: + new_dim: Tuple + + Returns: + + """ + # from openslide + downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim))) + level = self.get_best_level_for_downsample(downsample) + thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level])) + # resize + thumb_cp = cp.array(thumb, copy=False) + target_w, target_h = (x // int(downsample) for x in self.dimensions) + aspect_ratio = self.dimensions[0] / self.dimensions[1] + + target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) + return c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w)) + + def get_best_level_for_downsample(self, down_factor: float) -> int: + """Return the largest level that's smaller than the target downsample factor, consistent with openslide. + + Args: + down_factor: + + Returns: + + """ + level_downsamples_arr = np.asarray(self.level_downsamples) + # not exceeding the current downsample level + down_indices = np.where(level_downsamples_arr <= down_factor)[0] + down_values = level_downsamples_arr[down_indices] + # find the indices of the down_indices that points to the best downsample factor value + return down_indices[down_values.argmax()] + + def region_backend(self, location, level, size, **kwargs): + return self.handle.read_region(location=location, level=level, size=size, **kwargs) + + @staticmethod + def backend_to_array(region: Union[CuImage, cp.ndarray]) -> cp.ndarray: + return cp.array(region, copy=False) + + @staticmethod + def array_to_numpy(arr: cp.ndarray) -> np.ndarray: + return arr.get() + + @classmethod + def backend_to_pil(cls, region: CuImage) -> PILImage: + return Image.fromarray(cls.backend_to_array(region).get()) + + def read_label(self) -> CuImage: + return self.handle.associated_image("label") + + def read_macro(self) -> CuImage: + return self.handle.associated_image("macro") + + @LazyProperty + def associated_images(self) -> Mapping: + keys = self.handle.associated_images + return {k: self.handle.associated_image(k) for k in keys} + + @staticmethod + def grid_stack(grid: List[List[cp.ndarray]]): + return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1) + + @staticmethod + def backend_dim(region: CuImage) -> Tuple[int, int]: + return cast(Tuple[int, int], tuple(region.size()[:2])) + + @staticmethod + def array_shape(arr: cp.ndarray) -> Tuple[int, ...]: + return arr.shape diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py new file mode 100644 index 0000000..6388329 --- /dev/null +++ b/histoqc/wsi_handles/openslide_handle.py @@ -0,0 +1,145 @@ +import PIL.Image +import numpy as np + +from .base import WSIImageHandle +from histoqc.import_wrapper.openslide import openslide +from typing import Union, Tuple, Sequence, List, Mapping +from typing import cast +from PIL.Image import Image as PILImage +from .utils import rgba2rgb_pil +from PIL import Image + + +class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]): + _background_color: str + _magnification_factor: str + _has_bounding_box: bool + fname: str + handle: openslide.OpenSlide + + def backend_rgba2rgb(self, img) -> PILImage: + return rgba2rgb_pil(img, self.background_color) + + def __init__(self, fname): + super().__init__(fname) + self.handle = openslide.OpenSlide(fname) + self._has_bounding_box = True + self._bounding_box = self.__get_bounding_box() + + # get magnification factor from wsi slide + self._magnification_factor = self.handle.properties.get("openslide.objective-power") or \ + self.handle.properties.get("aperio.AppMag") + + # get background color + self._background_color = f"#{self.handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + + def __get_bounding_box(self) -> Tuple[int, int, int, int]: + (dim_width, dim_height) = self.handle.dimensions + + try: + x = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA')) + y = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA')) + width = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA')) + height = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA')) + return x, y, width, height + # if any attribute is 'NA' and fails the int() cast + except ValueError: + self._has_bounding_box = False + return 0, 0, dim_width, dim_height + + @property + def background_color(self): + return self._background_color + + @property + def has_bounding_box(self) -> bool: + return self._has_bounding_box + + @property + def bounding_box(self) -> Tuple[int, int, int, int]: + return self._bounding_box + + @property + def dimensions(self) -> Tuple[int, int]: + return self.handle.dimensions + + @property + def magnification(self) -> Union[str, None]: + return self._magnification_factor + + @property + def level_count(self) -> int: + return self.handle.level_count + + @property + def level_dimensions(self) -> Sequence[Tuple[int, int]]: + return self.handle.level_dimensions + + @property + def level_downsamples(self): + return self.handle.level_downsamples + + @property + def vendor(self): + return self.handle.properties.get("openslide.vendor", "NA") + + @property + def mpp_x(self) -> str: + return self.handle.properties.get("openslide.mpp-x", "NA") + + @property + def mpp_y(self) -> str: + return self.handle.properties.get("openslide.mpp-y", "NA") + + @property + def comment(self) -> str: + return self.handle.properties.get("openslide.comment", "NA") + + @classmethod + def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int]): + return np.array(Image.fromarray(data).resize(new_size_wh), copy=False) + + def get_thumbnail(self, new_dim): + return self.handle.get_thumbnail(new_dim) + + def get_best_level_for_downsample(self, down_factor): + return self.handle.get_best_level_for_downsample(down_factor) + + def region_backend(self, location, level, size, **kwargs): + return self.handle.read_region(location, level, size) + + @staticmethod + def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage: + if isinstance(region, np.ndarray): + return PIL.Image.fromarray(region) + return region + + @staticmethod + def backend_to_array(region: PILImage) -> np.ndarray: + return np.array(region) + + @staticmethod + def array_to_numpy(arr: np.ndarray) -> np.ndarray: + return np.array(arr) + + def read_label(self): + return self.handle.associated_images["label"] + + def read_macro(self): + return self.handle.associated_images["macro"] + + @property + def associated_images(self) -> Mapping[str, PILImage]: + return self.handle.associated_images + + @staticmethod + def grid_stack(grid: List[List[np.ndarray]]): + return np.concatenate([np.concatenate(row, axis=0) for row in grid], axis=1) + + @staticmethod + def backend_dim(region: PILImage) -> Tuple[int, int]: + return cast(Tuple[int, int], region.size) + + @staticmethod + def array_shape(arr: np.ndarray) -> Tuple[int, ...]: + return arr.shape diff --git a/histoqc/wsi_handles/utils.py b/histoqc/wsi_handles/utils.py new file mode 100644 index 0000000..fcceafc --- /dev/null +++ b/histoqc/wsi_handles/utils.py @@ -0,0 +1,38 @@ +from PIL import Image +from PIL.Image import Image as PILImage +from typing import Union, Iterable + + +def hex_to_rgb(hex_color: str): + if hex_color.startswith('#'): + hex_color = hex_color[1:] + + if len(hex_color) != 6: + raise ValueError(f"Invalid hex triplets. Length: {len(hex_color)}") + + rgb_color = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + return rgb_color + + +def _validate_numerics(data: Iterable[float]): + if not isinstance(data, Iterable): + return False + return all([isinstance(x, float) for x in data]) + + +def validate_color(background_color: Union[str, Iterable[float], float]): + # if str -> assume a hex triplet + if isinstance(background_color, str): + return hex_to_rgb(background_color) + # must be numeric, or sequence of numeric + if isinstance(background_color, float): + return background_color + assert _validate_numerics(background_color), (f"background color must be a hex triplet string, a number," + f" or a sequence of numbers") + return tuple(x for x in background_color) + + +def rgba2rgb_pil(img: PILImage, background_color) -> PILImage: + thumb = Image.new("RGB", img.size, validate_color(background_color)) + thumb.paste(img, None, img) + return thumb diff --git a/pyproject.toml b/pyproject.toml index 7fb7892..e068ec1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,3 +24,7 @@ exclude_lines = [ "if MYPY:", "except ImportError:", ] + +[options.extras_require] +cucim = ["cupy", "cucim"] +dicom = ["wsidicom"] \ No newline at end of file diff --git a/setup.py b/setup.py index ac20fff..e6a092b 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,10 @@ "version_scheme": "post-release", }, setup_requires=['setuptools_scm'], + extras_require={ + "dicom": ["wsidicom"], + "cucim": ["cucim", "cupy"], + }, package_data={ 'histoqc.config': ['*.ini'], 'histoqc.data': data_files, From decaa815a9a8d66111c4d7563dafedff89c70800 Mon Sep 17 00:00:00 2001 From: CielAl Date: Thu, 2 May 2024 09:06:28 -0400 Subject: [PATCH 2/6] Unified CPU/GPU interface and array adaptor. Modules updated --- histoqc/AnnotationModule.py | 6 +- histoqc/BaseImage.py | 38 ++- histoqc/BasicModule.py | 32 ++- histoqc/BlurDetectionModule.py | 31 ++- histoqc/BrightContrastModule.py | 67 ++++- histoqc/BubbleRegionByRegion.py | 60 ++-- histoqc/ClassificationModule.py | 144 ++++++---- histoqc/DeconvolutionModule.py | 14 +- histoqc/HistogramModule.py | 34 ++- histoqc/LightDarkModule.py | 69 +++-- histoqc/LocalTextureEstimationModule.py | 23 +- histoqc/MorphologyModule.py | 74 ++--- histoqc/SaveModule.py | 47 ++-- histoqc/TileExtractionModule.py | 15 +- histoqc/__main__.py | 10 +- histoqc/_worker.py | 14 +- histoqc/array_adapter/__init__.py | 2 + histoqc/array_adapter/adapter.py | 316 ++++++++++++++++++++++ histoqc/array_adapter/array_api_compat.py | 4 + histoqc/array_adapter/func_mapping.py | 51 ++++ histoqc/array_adapter/implementation.py | 0 histoqc/array_adapter/typing.py | 13 + histoqc/config/config.ini | 1 + histoqc/import_wrapper/cupy_extra.py | 6 + histoqc/wsi_handles/base.py | 25 ++ histoqc/wsi_handles/constants.py | 2 +- histoqc/wsi_handles/cuimage_handle.py | 19 +- histoqc/wsi_handles/openslide_handle.py | 10 + imported_functions_list.txt | 0 29 files changed, 880 insertions(+), 247 deletions(-) create mode 100644 histoqc/array_adapter/__init__.py create mode 100644 histoqc/array_adapter/adapter.py create mode 100644 histoqc/array_adapter/array_api_compat.py create mode 100644 histoqc/array_adapter/func_mapping.py create mode 100644 histoqc/array_adapter/implementation.py create mode 100644 histoqc/array_adapter/typing.py create mode 100644 histoqc/import_wrapper/cupy_extra.py create mode 100644 imported_functions_list.txt diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py index 67414f6..b36a7fc 100644 --- a/histoqc/AnnotationModule.py +++ b/histoqc/AnnotationModule.py @@ -1,6 +1,7 @@ import logging from typing import List, Tuple from histoqc.BaseImage import printMaskHelper, BaseImage +from histoqc.array_adapter import ArrayDevice, ArrayAdapter from skimage import io from skimage.util import img_as_ubyte import os @@ -76,6 +77,8 @@ def getParams(s: BaseImage, params): def saveAnnotationMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetAnnotationMask") + # quite pointless to enforce GPU acceleration here. Force to use CPU mode + adaptor = ArrayAdapter.build(input_device=ArrayDevice.CPU, output_device=ArrayDevice.CPU) (ann_format, file_path, suffix) = getParams(s, params) @@ -107,13 +110,14 @@ def saveAnnotationMask(s: BaseImage, params): (off_x, off_y, ncol, nrow) = s["img_bbox"] resize_factor = np.shape(s["img_mask_use"])[1] / ncol height, width = s["img_mask_use"].shape + annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0 mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png" io.imsave(mask_file_name, img_as_ubyte(annotationMask)) prev_mask = s["img_mask_use"] - s["img_mask_use"] = prev_mask & annotationMask + s["img_mask_use"] = adaptor.and_(prev_mask, annotationMask) s.addToPrintList("getAnnotationMask", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index 91e1a7e..aa507b5 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -6,9 +6,10 @@ import dill from distutils.util import strtobool import re -from typing import Union, Tuple, cast +from typing import Union, Tuple, cast, Optional from histoqc.wsi_handles.base import WSIImageHandle -from histoqc.wsi_handles.constants import KEY_OPENSLIDE +from histoqc.wsi_handles.constants import KEY_OPENSLIDE, KEY_CUCIM +from histoqc.array_adapter.typing import TYPE_ARRAY _REGEX_MAG = r"^(\d?\.?\d*X?)" _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE) MAG_NA = None @@ -35,18 +36,21 @@ class BaseImage(dict): - __image_handle: WSIImageHandle + _image_handle: WSIImageHandle @property def image_handle(self) -> WSIImageHandle: - return self.__image_handle + return self._image_handle @image_handle.setter def image_handle(self, image_handle: WSIImageHandle): - self.__image_handle = image_handle + self._image_handle = image_handle def __init__(self, fname, fname_outdir, params): dict.__init__(self) + handles = params.get("handles", KEY_CUCIM) + # dynamically load wsi image handle + self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles) self.in_memory_compression = strtobool(params.get("in_memory_compression", "False")) @@ -61,9 +65,6 @@ def __init__(self, fname, fname_outdir, params): self["dir"] = os.path.dirname(fname) # get handles from config - handles = params.get("handles", KEY_OPENSLIDE) - # dynamically load wsi image handle - self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles) self["image_base_size"] = self.image_handle.dimensions self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box", "False")) @@ -95,16 +96,29 @@ def __init__(self, fname, fname_outdir, params): self["completed"] = [] + @staticmethod + def is_img_data(key: str) -> bool: + return key.startswith("img") and key != "img_bbox" + + def _sync_to_handle(self, key, value): + if not self.__class__.is_img_data(key): + return value + if hasattr(self, "_image_handle") and self.image_handle is not None: + value = self.image_handle.adapter.sync(value) + return value + def __getitem__(self, key): value = super(BaseImage, self).__getitem__(key) - if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"): + if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key): value = dill.loads(zlib.decompress(value)) + + value = self._sync_to_handle(key, value) return value def __setitem__(self, key, value): - if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"): + value = self._sync_to_handle(key, value) + if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key): value = zlib.compress(dill.dumps(value), level=5) - return super(BaseImage, self).__setitem__(key, value) # setbounding box start coordinate and size @@ -152,7 +166,7 @@ def validate_slide_size(size: str, assertion: bool = False): # for now just cast it to str return size - def getImgThumb(self, size: str): + def getImgThumb(self, size: str) -> Optional[TYPE_ARRAY]: # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't # really handle it that way, and trace of previous coding also suggest that there actually lack a params # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here. diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py index 4d4dbcd..fa90ff7 100644 --- a/histoqc/BasicModule.py +++ b/histoqc/BasicModule.py @@ -1,6 +1,7 @@ import logging import os from histoqc.BaseImage import printMaskHelper +from histoqc.array_adapter import ArrayAdapter, ArrayDevice from skimage.morphology import remove_small_objects, binary_opening, disk from skimage import io from skimage.util import img_as_ubyte @@ -27,13 +28,23 @@ def finalComputations(s: BaseImage, params): def finalProcessingSpur(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingSpur") + + adapter = s.image_handle.adapter disk_radius = int(params.get("disk_radius", "25")) - selem = disk(disk_radius) - mask = s["img_mask_use"] - mask_opened = binary_opening(mask, selem) - mask_spur = ~mask_opened & mask - io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", img_as_ubyte(mask_spur)) + # selem = adapter(disk)(disk_radius) + mask = s["img_mask_use"] + mask_opened = adapter(binary_opening)(mask, footprint=disk(disk_radius)) + # todo: it is safe to directly compare + # todo: ~mask_opened & mask directly as the device of both are synchronized by adapter. + # todo: but this assumes that an adapter is used in the module so + # todo: for now unless we implement an array proxy the best practice is to use explicit and_ method + # todo: to avoid mistakes + mask_spur = adapter.and_(~mask_opened, mask) + fname = os.path.join(s["outdir"], f"{s['filename']}_spur.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_spur)) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", ArrayAdapter.move_to_device(mask_spur_ubyte, + # ArrayDevice.CPU)) prev_mask = s["img_mask_use"] s["img_mask_use"] = mask_opened @@ -52,13 +63,16 @@ def finalProcessingSpur(s: BaseImage, params): def finalProcessingArea(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingArea") + + adapter = s.image_handle.adapter area_thresh = int(params.get("area_threshold", "1000")) mask = s["img_mask_use"] - mask_opened = remove_small_objects(mask, min_size=area_thresh) - mask_removed_area = ~mask_opened & mask - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area)) + mask_opened = adapter(remove_small_objects)(mask, min_size=area_thresh) + mask_removed_area = adapter.and_(~mask_opened, mask) + fname = os.path.join(s["outdir"], f"{s['filename']}_areathresh.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_removed_area)) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area)) prev_mask = s["img_mask_use"] s["img_mask_use"] = mask_opened > 0 diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py index 9900780..7ea4993 100644 --- a/histoqc/BlurDetectionModule.py +++ b/histoqc/BlurDetectionModule.py @@ -7,6 +7,7 @@ from skimage.util import img_as_ubyte from skimage.color import rgb2gray import numpy as np +from histoqc.array_adapter import ArrayAdapter, FUNC_MAP, ArrayDevice # Analysis of focus measure operators for shape-from-focus # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012 @@ -15,35 +16,41 @@ def identifyBlurryRegions(s: BaseImage, params): logging.info(f"{s['filename']} - \tidentifyBlurryRegions") - + adapter = s.image_handle.adapter blur_radius = int(params.get("blur_radius", 7)) blur_threshold = float(params.get("blur_threshold", .1)) - img = s.getImgThumb(params.get("image_work_size", "2.5x")) - img = rgb2gray(img) - img_laplace = np.abs(skimage.filters.laplace(img)) - mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold + img = adapter(rgb2gray)(img) + # use the __abs__ interface + img_laplace = abs(adapter(skimage.filters.laplace)(img)) + mask = adapter(skimage.filters.gaussian)(img_laplace, sigma=blur_radius) <= blur_threshold # for some reason resize takes a grayscale and produces a 3chan - mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, 1] + # Note: the reason you obtain a 3chan is that you specified a 3chan output shape + mask_resized_shape = s.getImgThumb(s["image_work_size"]).shape[:2] + mask = adapter(skimage.transform.resize)(mask, output_shape=mask_resized_shape, order=0) - mask = s["img_mask_use"] & (mask > 0) + mask = adapter.and_(s["img_mask_use"], mask > 0) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_blurry.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) s["img_mask_blurry"] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_blurry"] + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_blurry"]) + + labeled_mask = adapter(morphology.label)(mask) + rps = adapter(measure.regionprops)(labeled_mask) - rps = measure.regionprops(morphology.label(mask)) if rps: - areas = np.asarray([rp.area for rp in rps]) + # scalar stats --> CPU is sufficient. + # use float to cast cp.array(scalar) to python's float + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() else: nobj = area_max = area_mean = 0 - s.addToPrintList("blurry_removed_num_regions", str(nobj)) s.addToPrintList("blurry_removed_mean_area", str(area_mean)) s.addToPrintList("blurry_removed_max_area", str(area_max)) diff --git a/histoqc/BrightContrastModule.py b/histoqc/BrightContrastModule.py index df98aec..a6e1ebe 100644 --- a/histoqc/BrightContrastModule.py +++ b/histoqc/BrightContrastModule.py @@ -4,9 +4,45 @@ from skimage.color import convert_colorspace, rgb2gray from distutils.util import strtobool from histoqc.BaseImage import BaseImage +from histoqc.array_adapter.typing import TYPE_ARRAY + +_EPS = np.finfo(np.float32).eps + + +def _rms(img: TYPE_ARRAY) -> float: + """Root Mean Square contrast for non-empty masked images + Args: + img: Input image or a vector yielded from masked image. Not Tissue-less + Returns: + + """ + assert img.size > 0 + err = (img - img.mean()) ** 2 + return float(np.sqrt(err.sum() / img.size)) + + +def _michelson(img: TYPE_ARRAY) -> float: + """Helper function for non-empty masked input + Args: + img: + Returns: + + """ + assert img.size > 0 + max_img = img.max() + min_img = img.min() + # add eps to avoid nan when max and min are both 0. + denominator = max_img + min_img + denominator = denominator if denominator != 0 else denominator + _EPS + return float((max_img - min_img) / denominator) + + +def _tenengrad_from_sobel2(sobel_img2: TYPE_ARRAY): + return np.sqrt(sobel_img2.sum()) / sobel_img2.size def getBrightnessGray(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix+"_" if prefix else "" logging.info(f"{s['filename']} - \tgetContrast:{prefix}") @@ -17,11 +53,11 @@ def getBrightnessGray(s: BaseImage, params): img = s.getImgThumb(s["image_work_size"]) - img_g = rgb2gray(img) + img_g = adapter(rgb2gray)(img) if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] - + mask = adapter.sync(mask) img_g = img_g[mask] if img_g.size == 0: img_g = np.array(-100) @@ -33,6 +69,7 @@ def getBrightnessGray(s: BaseImage, params): def getBrightnessByChannelinColorSpace(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -48,7 +85,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params): suffix = "" if to_color_space != "RGB": - img = convert_colorspace(img, "RGB", to_color_space) + img = adapter(convert_colorspace)(img, fromspace="RGB", tospace=to_color_space) suffix = "_" + to_color_space for chan in range(0, 3): @@ -56,6 +93,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params): if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] + mask = adapter.sync(mask) vals = vals[mask] if vals.size == 0: @@ -68,6 +106,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params): def getContrast(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -78,15 +117,16 @@ def getContrast(s: BaseImage, params): invert = strtobool(params.get("invert", "False")) img = s.getImgThumb(s["image_work_size"]) - img = rgb2gray(img) + img = adapter(rgb2gray)(img) # noinspection PyTypeChecker - sobel_img = sobel(img) ** 2 + sobel_img2 = adapter(sobel)(img) ** 2 + mask = None if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] - - sobel_img = sobel_img[mask] + img, sobel_img2, mask = adapter.device_sync_all(img, sobel_img2, mask) + sobel_img2 = sobel_img2[mask] img = img[s["img_mask_use"]] if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to @@ -104,17 +144,18 @@ def getContrast(s: BaseImage, params): return # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask - tenen_grad_contrast = np.sqrt(np.sum(sobel_img)) / img.size + + # np.sqrt(sobel_img2.sum()) / sobel_img2.size + # np.sqrt(np.sum(sobel_img)) / img.size + tenen_grad_contrast = _tenengrad_from_sobel2(sobel_img2=sobel_img2) + s.addToPrintList(f"{prefix}tenen_grad_contrast", str(tenen_grad_contrast)) # Michelson contrast - max_img = img.max() - min_img = img.min() - contrast = (max_img - min_img) / (max_img + min_img) + contrast = _michelson(img) s.addToPrintList(f"{prefix}michelson_contrast", str(contrast)) # RMS contrast - rms_contrast = np.sqrt(pow(img - img.mean(), 2).sum() / img.size) + rms_contrast = _rms(img) s.addToPrintList(f"{prefix}rms_contrast", str(rms_contrast)) - return diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py index e5d2249..fc2bf75 100644 --- a/histoqc/BubbleRegionByRegion.py +++ b/histoqc/BubbleRegionByRegion.py @@ -18,15 +18,16 @@ # WARNING: Not as robust as other modules def roiWise(s: BaseImage, params): + adapter = s.image_handle.adapter name = params.get("name", "classTask") print("\tpixelWise:\t", name, end="") - level = int(params.get("level", 1)) win_size = int(params.get("win_size", 2048)) # the size of the ROI which will be iteratively considered osh = s.image_handle dim_base = osh.level_dimensions[0] + level = min(int(params.get("level", 1)), len(osh.level_dimensions) - 1) dims = osh.level_dimensions[level] ratio_x = dim_base[0] / dims[0] # figure out the difference between desi @@ -43,63 +44,63 @@ def roiWise(s: BaseImage, params): row_piece = [] print('.', end='', flush=True) for y in range(0, dim_base[1], round(win_size * ratio_y)): - region = osh.read_region((x, y), 1, (win_size, win_size)) - region = np.array(region) - region = region[:, :, 0:3] # remove alpha channel - g = rgb2gray(region) + # todo: confirm -- the original level is hardcoded to be 1, shouldn't it be the level variable? + + region = osh.region_backend((x, y), level, (win_size, win_size)) + region = osh.backend_to_array(region)[..., :3] + g = adapter(rgb2gray)(region) # todo -- forward compatibility. Later version of frangi alters the signatures - feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges) + sigmas = frangi_scale_range + (frangi_scale_step,) + feat = adapter(frangi)(g, sigmas=sigmas, beta=frangi_beta1, + gamma=frangi_beta2, black_ridges=frangi_black_ridges) + feat = feat / 8.875854409275627e-08 - region_mask = np.bitwise_and(g < .3, feat > 5) - region_mask = remove_small_objects(region_mask, min_size=100) - # region_std = region.std(axis=2) - # region_gray = rgb2gray(region) - # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255) - # region_mask = scipy.ndimage.morphology.binary_dilation(region_mask, iterations=1) - # region_mask = resize(region_mask , (region_mask.shape[0] / 2, region_mask.shape[1] / 2)) + region_mask = adapter.and_(g < .3, feat > 5) + region_mask = adapter(remove_small_objects)(region_mask, min_size=100) row_piece.append(region_mask) + # sanity check: force to synchronize the device + row_piece = adapter.device_sync_all(*row_piece) row_piece = np.concatenate(row_piece, axis=0) - mask.append(row_piece) + mask.append(row_piece) + mask = adapter.device_sync_all(*mask) mask = np.concatenate(mask, axis=1) if params.get("area_threshold", "") != "": # forward compatibility # inplace=True is equivalent to out=mask. Therefore, it is removed in future version - mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), out=mask) + mask = adapter(remove_small_objects)(mask, min_size=int(params.get("area_threshold", "")), out=mask) s.addToPrintList(name, str(mask.mean())) - # TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask - # s.addToPrintList(name, - # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), - # prev_mask, s["img_mask_use"])) - - # .astype(np.uint8) * 255) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) - + fname = os.path.join(s["outdir"], f"{s['filename']}_BubbleBounds.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) return def detectSmoothness(s: BaseImage, params): + + adapter = s.image_handle.adapter logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") thresh = float(params.get("threshold", ".01")) kernel_size = int(params.get("kernel_size", "10")) min_object_size = int(params.get("min_object_size", "100")) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) - imf = convolve2d(img, avg, mode="same") + imf = adapter(convolve2d)(img, in2=avg, mode="same") + mask_flat = abs(imf - img) < thresh - mask_flat = remove_small_objects(mask_flat, min_size=min_object_size) - mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size) + mask_flat = adapter(remove_small_objects)(mask_flat, min_size=min_object_size) + mask_flat = ~adapter(remove_small_objects)(~mask_flat, min_size=min_object_size) prev_mask = s["img_mask_use"] s["img_mask_flat"] = mask_flat - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_flat.png") + flat_out = adapter.and_(mask_flat, prev_mask) + adapter.imsave(fname, adapter(img_as_ubyte)(flat_out)) s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] @@ -112,6 +113,5 @@ def detectSmoothness(s: BaseImage, params): f"remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") - return diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index fdbe039..b2cbb4a 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -2,7 +2,7 @@ import os import re import sys - +from histoqc.array_adapter import ArrayAdapter, ArrayDevice from ast import literal_eval as make_tuple from distutils.util import strtobool @@ -10,7 +10,7 @@ from histoqc.BaseImage import printMaskHelper, BaseImage from skimage import io from skimage.util import img_as_ubyte, img_as_bool -from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from skimage.filters import gabor, frangi, gaussian, median, laplace from skimage.color import rgb2gray from skimage.morphology import remove_small_objects, disk, dilation from skimage.feature import local_binary_pattern @@ -30,29 +30,35 @@ def pixelWise(s: BaseImage, params): thresh = float(params.get("threshold", .5)) fname = params.get("tsv_file", "") + if fname == "": logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name) sys.exit(1) model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1) - img = s.getImgThumb(s["image_work_size"]) + # todo no formal support for GNB now + # todo Possible solution: sklearn with array-api-compat and implement a wrapper into the ArrayAdaptor + # todo Also: need to rework the GaussianNB.fit interface into a wrapper. + device = s.image_handle.device + adapter = ArrayAdapter.build(input_device=device, output_device=device) + img = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU) gnb = GaussianNB() gnb.fit(model_vals[:, 1:], model_vals[:, 0]) - cal = gnb.predict_proba(img.reshape(-1, 3)) + cal = adapter(gnb.predict_proba)(img.reshape(-1, 3)) cal = cal.reshape(img.shape[0], img.shape[1], 2) mask = cal[:, :, 1] > thresh - mask = s["img_mask_use"] & (mask > 0) + mask = adapter.and_(s["img_mask_use"], mask > 0) s.addToPrintList(name, str(mask.mean())) - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, img_as_ubyte(mask)) s["img_mask_" + name] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name]) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -69,63 +75,79 @@ def pixelWise(s: BaseImage, params): # extract_patches_2d(image, patch_size, max_patches=None, random_state=None def compute_rgb(img, params): - return img + adapter = params["adapter"] + return adapter.sync(img) def compute_laplace(img, params): laplace_ksize = int(params.get("laplace_ksize", 3)) - return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] + adapter = params["adapter"] + img_gray = adapter(rgb2gray)(img) + # return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] + return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None] + def compute_lbp(img, params): lbp_radius = float(params.get("lbp_radius", 3)) lbp_points = int(params.get("lbp_points", 24)) # example sets radius * 8 lbp_method = params.get("lbp_method", "default") + # todo: currently no LBP implemented + adapter: ArrayAdapter = params['adapter'] + img_gray = adapter(rgb2gray)(img) + # return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] + return adapter(local_binary_pattern)(img_gray, P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] - return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] def compute_gaussian(img, params): + adapter = params["adapter"] gaussian_sigma = int(params.get("gaussian_sigma", 1)) - gaussian_multichan = strtobool(params.get("gaussian_multichan", False)) - + gaussian_multichan = strtobool(params.get("gaussian_multichan", "False")) # todo: forward compatibility # todo: after 0.19 default multichannel behavior is fixed and explicitly setting channel_axis is preferred. # todo: multichannel is also deprecated in later versions if gaussian_multichan: - return gaussian(img, sigma=gaussian_sigma, channel_axis=-1) + return adapter(gaussian)(img, sigma=gaussian_sigma, channel_axis=-1) else: - return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None] + img_gray = adapter(rgb2gray)(img) + return adapter(gaussian)(img_gray, sigma=gaussian_sigma)[:, :, None] def compute_median(img, params): median_disk_size = int(params.get("median_disk_size", 3)) # starting from 0.19, selem is deprecated and footprint is preferred. - return median(rgb2gray(img), footprint=disk(median_disk_size))[:, :, None] + adapter: ArrayAdapter = params['adapter'] + imgg = adapter(rgb2gray)(img) + footprint = adapter(disk)(median_disk_size) + return adapter(median)(imgg, footprint=footprint)[:, :, None] def compute_gabor(img, params): - if not params["shared_dict"].get("gabor_kernels", False): - gabor_theta = int(params.get("gabor_theta", 4)) - gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)")) - gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)")) - - kernels = [] - for theta in range(gabor_theta): - theta = theta / 4. * np.pi - for sigma in gabor_sigma: - for frequency in gabor_frequency: - kernel = np.real(gabor_kernel(frequency, theta=theta, - sigma_x=sigma, sigma_y=sigma)) - kernels.append(kernel) - params["shared_dict"]["gabor_kernels"] = kernels - - kernels = params["shared_dict"]["gabor_kernels"] - imgg = rgb2gray(img) - feats = np.zeros((imgg.shape[0], imgg.shape[1], len(kernels)), dtype=np.double) - for k, kernel in enumerate(kernels): - filtered = ndi.convolve(imgg, kernel, mode='wrap') - feats[:, :, k] = filtered + adapter = params["adapter"] + # if not params["shared_dict"].get("gabor_kernels", False): + # todo: the benefit of caching the gabor_kernel is marginal as the computational head to obtain the kernel itself + # todo: is neglectable + gabor_theta = int(params.get("gabor_theta", 4)) + gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)")) + gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)")) + + # kernels = [] + fts = [] + for theta in range(gabor_theta): + theta = theta / 4. * np.pi + for sigma in gabor_sigma: + for frequency in gabor_frequency: + + fts.append((frequency, theta, sigma)) + + imgg = adapter(rgb2gray)(img) + feats = np.zeros((imgg.shape[0], imgg.shape[1], len(fts)), dtype=np.double) + feats = adapter.sync(feats) + + for idx, (freq, tht, sig) in enumerate(fts): + filtered, _ = adapter(gabor)(imgg, theta=tht, sigma_x=sig, sigma_y=sig, frequency=freq, mode='wrap') + feats[:, :, idx] = filtered return feats @@ -136,22 +158,31 @@ def compute_frangi(img, params): frangi_beta2 = float(params.get("frangi_beta2", 15)) frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True")) sigmas = frangi_scale_range + (frangi_scale_step,) - feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, black_ridges=frangi_black_ridges) + + adapter: ArrayAdapter = params["adapter"] + img_gray = adapter(rgb2gray)(img) + feat = adapter(frangi)(img_gray, sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, + black_ridges=frangi_black_ridges) + # feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, + # black_ridges=frangi_black_ridges) return feat[:, :, None] # add singleton dimension def compute_features(img, params): features = params.get("features", "") - + adapter = params["adapter"] feats = [] for feature in features.splitlines(): func = getattr(sys.modules[__name__], f"compute_{feature}") feats.append(func(img, params)) - + feats = adapter.device_sync_all(*feats) + # cupy can be implicitly concatenated using np's API return np.concatenate(feats, axis=2) def byExampleWithFeatures(s: BaseImage, params): + device = s.image_handle.device + adapter = ArrayAdapter.build(input_device=device, output_device=device) name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") @@ -167,13 +198,16 @@ def byExampleWithFeatures(s: BaseImage, params): logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!") sys.exit(1) + adapter = s.image_handle.adapter + params['adapter'] = adapter with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model # then it is shared with all other modules if not params["shared_dict"].get("model_" + name, False): + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") model_vals = [] - model_labels = np.empty([0, 1]) + model_labels = adapter.sync(np.empty([0, 1])) for ex in params["examples"].splitlines(): ex = re.split(r'(? thresh area_thresh = int(params.get("area_threshold", "5")) if area_thresh > 0: # inplace=True is redundant and deprecated. - mask = remove_small_objects(mask, min_size=area_thresh, out=mask) + mask = adapter(remove_small_objects)(mask, min_size=area_thresh, out=mask) dilate_kernel_size = int(params.get("dilate_kernel_size", "0")) if dilate_kernel_size > 0: - mask = dilation(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size))) + mask = adapter(dilation)(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size))) mask = s["img_mask_use"] & (mask > 0) + # mask_ubyte = adapter.move_to_device(adapter(img_as_ubyte)(mask), device=ArrayDevice.CPU) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", mask_ubyte) + + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) s["img_mask_" + name] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + + # todo: now the BaseImage will explicitly set/get img_* keys (except bbox) to the corresponding img handle device + # todo: however I think it could be better to also explicitly let the adapter to handle all binary operations. + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name]) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -245,5 +290,4 @@ def byExampleWithFeatures(s: BaseImage, params): s["img_mask_force"].append("img_mask_" + name) s["completed"].append(f"byExampleWithFeatures:{name}") - return diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py index bff440b..f3b4613 100644 --- a/histoqc/DeconvolutionModule.py +++ b/histoqc/DeconvolutionModule.py @@ -12,6 +12,7 @@ def separateStains(s: BaseImage, params): logging.info(f"{s['filename']} - \tseparateStains") + adapter = s.image_handle.adapter stain = params.get("stain", "") use_mask = strtobool(params.get("use_mask", "True")) @@ -25,13 +26,15 @@ def separateStains(s: BaseImage, params): logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") sys.exit(1) - mask = s["img_mask_use"] + adapter.sync(stain_matrix) + mask = adapter.sync(s["img_mask_use"]) if use_mask and len(mask.nonzero()[0]) == 0: # -- lets just error check at the top if mask is empty and abort early for c in range(3): s.addToPrintList(f"deconv_c{c}_std", str(-100)) s.addToPrintList(f"deconv_c{c}_mean", str(-100)) - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) + fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png") + adapter.imsave(fname, img_as_ubyte(np.zeros(mask.shape))) logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " f"remains detectable! Saving Black images") @@ -41,11 +44,10 @@ def separateStains(s: BaseImage, params): return img = s.getImgThumb(s["image_work_size"]) - dimg = separate_stains(img, stain_matrix) + dimg = adapter(separate_stains)(img, conv_matrix=stain_matrix) for c in range(0, 3): dc = dimg[:, :, c] - clip_max_val = np.quantile(dc.flatten(), .99) dc = np.clip(dc, a_min=0, a_max=clip_max_val) @@ -65,6 +67,6 @@ def separateStains(s: BaseImage, params): s.addToPrintList(f"deconv_c{c}_std", str(dc.std())) dc = (dc - dc_min) / float(dc_max - dc_min) * mask - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc)) - + fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(dc)) return diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py index a57122f..901beeb 100644 --- a/histoqc/HistogramModule.py +++ b/histoqc/HistogramModule.py @@ -5,18 +5,27 @@ import matplotlib.pyplot as plt from distutils.util import strtobool from histoqc.BaseImage import BaseImage +from typing import Union +from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.array_adapter.typing import TYPE_ARRAY +# todo: beware that because there is no lock, it is likely that each worker will compute the template of their own. # this holds a local copy of the histograms of the template images so that they need only be computed once global_holder = {} def getHistogram(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetHistogram") + adapter = s.image_handle.adapter limit_to_mask = strtobool(params.get("limit_to_mask", True)) bins = int(params.get("bins", 20)) img = s.getImgThumb(s["image_work_size"]) + tissue_mask = s["img_mask_use"] + # matplotlib --> pointless to use GPU here even if a corresponding API exists + img = adapter.move_to_device(img, ArrayDevice.CPU) + tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU) if limit_to_mask: - img = img[s["img_mask_use"]] + img = img[tissue_mask] else: img = img.reshape(-1, 3) @@ -27,32 +36,38 @@ def getHistogram(s: BaseImage, params): ax.set_title('Color Distribution for ' + s["filename"]) ax.set_xlabel('Pixel Val') ax.set_ylabel('Density') - plt.savefig(s["outdir"] + os.sep + s["filename"] + "_hist.png") + fname = os.path.join(s["outdir"], f"{s['filename']}_hist.png") + plt.savefig(fname) plt.close() return -def computeHistogram(img, bins, mask=-1): +def computeHistogram(img: TYPE_ARRAY, bins: int, + adapter: ArrayAdapter, mask: Union[TYPE_ARRAY, int] = -1) -> TYPE_ARRAY: result = np.zeros(shape=(bins, 3)) + img, mask = adapter.device_sync_all(img, mask) + result = adapter.sync(result) for chan in range(0, 3): vals = img[:, :, chan].flatten() - if isinstance(mask, np.ndarray): + if ArrayAdapter.is_array(mask): + vals = vals[mask.flatten()] result[:, chan] = np.histogram(vals, bins=bins, density=True, range=(0, 255))[0] - return result def compareToTemplates(s: BaseImage, params): logging.info(f"{s['filename']} - \tcompareToTemplates") + adapter = s.image_handle.adapter bins = int(params.get("bins", 20)) limit_to_mask = strtobool(params.get("limit_to_mask", True)) # if the histograms haven't already been computed, compute and store them now if not global_holder.get("templates", False): templates = {} for template in params["templates"].splitlines(): - templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins) + templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), + bins, adapter) # compute each of their histograms global_holder["templates"] = templates @@ -70,11 +85,12 @@ def compareToTemplates(s: BaseImage, params): imghst = np.zeros((bins, 3)) else: - imghst = computeHistogram(img, bins, mask) + imghst = computeHistogram(img, bins, adapter, mask) else: - imghst = computeHistogram(img, bins) + imghst = computeHistogram(img, bins, adapter) for template in global_holder["templates"]: - val = np.sum(pow(abs(global_holder["templates"][template] - imghst), 2)) + hist_diff = adapter.sub(global_holder["templates"][template], imghst) + val = (abs(hist_diff) ** 2).sum() s.addToPrintList(template + "_MSE_hist", str(val)) return diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py index cf3290a..4fe8a8d 100644 --- a/histoqc/LightDarkModule.py +++ b/histoqc/LightDarkModule.py @@ -3,6 +3,7 @@ import numpy as np from histoqc.BaseImage import printMaskHelper, BaseImage from skimage import io, color +from histoqc.array_adapter import ArrayAdapter, ArrayDevice from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage.filters import threshold_otsu, rank @@ -12,27 +13,28 @@ def getIntensityThresholdOtsu(s: BaseImage, params): + logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu") + adapter = s.image_handle.adapter name = params.get("name", "classTask") local = strtobool(params.get("local", "False")) radius = int(params.get("radius", 15)) - selem = disk(radius) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) if local: - thresh = rank.otsu(img, selem) + thresh = adapter(rank.otsu)(img, footprint=disk(radius)) else: - thresh = threshold_otsu(img) + thresh = adapter(threshold_otsu)(img) region_below_thresh = img < thresh s["img_mask_" + name] = region_below_thresh > 0 if strtobool(params.get("invert", "False")): s["img_mask_" + name] = ~s["img_mask_" + name] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(s["img_mask_" + name])) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_" + name])) prev_mask = s["img_mask_use"] s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] @@ -50,6 +52,8 @@ def getIntensityThresholdOtsu(s: BaseImage, params): def getIntensityThresholdPercent(s: BaseImage, params): + + adapter = s.image_handle.adapter name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}") lower_thresh = float(params.get("lower_threshold", "-inf")) @@ -66,9 +70,8 @@ def getIntensityThresholdPercent(s: BaseImage, params): map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh) - region_between_interval = np.bitwise_and(region_between_interval, map_std) s["img_mask_" + name] = region_between_interval > 0 @@ -77,10 +80,11 @@ def getIntensityThresholdPercent(s: BaseImage, params): s["img_mask_" + name] = ~s["img_mask_" + name] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", - img_as_ubyte(prev_mask & ~s["img_mask_" + name])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_" + name]) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + mask_out = adapter.and_(prev_mask, ~s["img_mask_" + name]) + mask_out = adapter(img_as_ubyte)(mask_out) + adapter.imsave(fname, mask_out) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -95,6 +99,7 @@ def getIntensityThresholdPercent(s: BaseImage, params): def removeBrightestPixels(s: BaseImage, params): + adapter = s.image_handle.adapter logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels") # lower_thresh = float(params.get("lower_threshold", -float("inf"))) @@ -104,9 +109,10 @@ def removeBrightestPixels(s: BaseImage, params): # upper_var = float(params.get("upper_variance", float("inf"))) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) - kmeans = KMeans(n_clusters=3, n_init=1).fit(img.reshape([-1, 1])) + kmc = KMeans(n_clusters=3, n_init=1) + kmeans = adapter(kmc.fit)(img.reshape([-1, 1])) # noinspection PyUnresolvedReferences brightest_cluster = np.argmax(kmeans.cluster_centers_) # noinspection PyUnresolvedReferences @@ -118,9 +124,11 @@ def removeBrightestPixels(s: BaseImage, params): s["img_mask_bright"] = ~s["img_mask_bright"] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"]) + fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png.png") + bright_out = adapter.and_(prev_mask, ~s["img_mask_bright"]) + bright_out = adapter(img_as_ubyte)(bright_out) + adapter.imsave(fname, bright_out) s.addToPrintList("brightestPixels", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -136,24 +144,26 @@ def removeBrightestPixels(s: BaseImage, params): def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering") + adapter = s.image_handle.adapter disk_size = int(params.get("disk_size", 10000)) threshold = int(params.get("upper_threshold", 200)) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - img = (img * 255).astype(np.uint8) - selem = disk(disk_size) + img = adapter(color.rgb2gray)(img) + img = adapter(img_as_ubyte)(img) + # note - for uint type, CPU's rank.minimum is >>> faster than GPU's erosion + imgfilt = adapter(rank.minimum)(img, footprint=disk(disk_size)) - imgfilt = rank.minimum(img, selem) s["img_mask_bright"] = imgfilt > threshold if strtobool(params.get("invert", "True")): s["img_mask_bright"] = ~s["img_mask_bright"] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"]) + fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png") + mask_out = adapter.and_(prev_mask, ~s["img_mask_bright"]) + adapter.imsave(fname, adapter(img_as_ubyte)(mask_out)) s.addToPrintList("brightestPixels", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -169,11 +179,12 @@ def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params): def saveEqualisedImage(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage") - + adapter = s.image_handle.adapter img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - - out = exposure.equalize_hist((img*255).astype(np.uint8)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", img_as_ubyte(out)) + img = adapter(color.rgb2gray)(img) + img_u8 = adapter(img_as_ubyte)(img) + out = adapter(exposure.equalize_hist)(img_u8) + out_u8 = adapter.move_to_device(adapter(img_as_ubyte)(out), ArrayDevice.CPU) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", out_u8) return diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py index 610141c..2e28762 100644 --- a/histoqc/LocalTextureEstimationModule.py +++ b/histoqc/LocalTextureEstimationModule.py @@ -3,12 +3,14 @@ from skimage import color from distutils.util import strtobool from skimage.feature import graycomatrix, graycoprops +from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.BaseImage import BaseImage -def estimateGreyComatrixFeatures(s, params): +def estimateGreyComatrixFeatures(s: BaseImage, params): prefix = params.get("prefix", None) prefix = prefix+"_" if prefix else "" - + adapter = s.image_handle.adapter logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}") patch_size = int(params.get("patch_size", 32)) npatches = int(params.get("npatches", 100)) @@ -18,7 +20,7 @@ def estimateGreyComatrixFeatures(s, params): mask_name = params.get("mask_name", "img_mask_use") img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) mask = s[mask_name] if not invert else ~s[mask_name] if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask @@ -29,20 +31,23 @@ def estimateGreyComatrixFeatures(s, params): return maskidx = mask.nonzero() - maskidx = np.asarray(maskidx).transpose() + maskidx = ArrayAdapter.new_array(maskidx, array_device=ArrayDevice.CPU).transpose() idx = np.random.choice(maskidx.shape[0], npatches) results = [] - for index in idx: r, c = maskidx[index, :] + patch = img[r:r + patch_size, c:c + patch_size] - glcm = graycomatrix(np.digitize(patch, np.linspace(0, 1, num=nlevels), right=True), distances=[5], - angles=[0], levels=nlevels, symmetric=True, normed=True) - results.append([graycoprops(glcm, prop=feat) for feat in feats]) + image = adapter(np.digitize)(patch, bins=np.linspace(0, 1, num=nlevels), right=True) + glcm = adapter(graycomatrix)(image, distances=[5], + angles=[0], levels=nlevels, symmetric=True, normed=True) + haralick_feats = [adapter(graycoprops)(glcm, prop=feat) for feat in feats] + haralick_feats = adapter.device_sync_all(*haralick_feats) + results.append(haralick_feats) - results = np.asarray(results).squeeze() + results = adapter.asarray(results).squeeze() for vals, feat in zip(results.transpose(), feats): s.addToPrintList(f"{prefix}{feat}", str(vals.mean())) diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py index d660cfe..4d2c301 100644 --- a/histoqc/MorphologyModule.py +++ b/histoqc/MorphologyModule.py @@ -1,28 +1,32 @@ import logging import os import numpy as np -from histoqc.BaseImage import printMaskHelper +from histoqc.BaseImage import printMaskHelper, BaseImage +from histoqc.array_adapter import ArrayAdapter +from histoqc.array_adapter.typing import TYPE_ARRAY from skimage import io, morphology, measure from skimage.util import img_as_ubyte from scipy import ndimage as ndi from typing import cast -def removeSmallObjects(s, params): +def removeSmallObjects(s: BaseImage, params): logging.info(f"{s['filename']} - \tremoveSmallObjects") + adapter = s.image_handle.adapter min_size = int(params.get("min_size", 64)) - img_reduced = morphology.remove_small_objects(s["img_mask_use"], min_size=min_size) - img_small = np.invert(img_reduced) & s["img_mask_use"] + img_reduced = adapter(morphology.remove_small_objects)(s["img_mask_use"], min_size=min_size) + img_small = adapter.and_(~img_reduced, s["img_mask_use"]) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_remove.png", img_as_ubyte(img_small)) + fname = os.path.join(s["outdir"], f"{s['filename']}_small_remove.png") + adapter.imsave(fname, adapter(img_as_ubyte)(img_small)) s["img_mask_small_filled"] = (img_small * 255) > 0 prev_mask = s["img_mask_use"] s["img_mask_use"] = img_reduced - - rps = measure.regionprops(morphology.label(img_small)) + label_small = adapter(morphology.label)(img_small) + rps = adapter(measure.regionprops)(label_small) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() @@ -41,17 +45,17 @@ def removeSmallObjects(s, params): f"remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append(f"After MorphologyModule.removeSmallObjects: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") - return -def remove_large_objects(img, max_size): +def remove_large_objects(img: TYPE_ARRAY, max_size: int, adapter: ArrayAdapter): # code taken from morphology.remove_small_holes, except switched < with > - selem = ndi.generate_binary_structure(img.ndim, 1) - ccs = np.zeros_like(img, dtype=np.int32) - ndi.label(img, selem, output=ccs) - component_sizes: np.ndarray = np.bincount(ccs.ravel()) - too_big: np.ndarray = cast(np.ndarray, component_sizes > max_size) + # selem = adapter(ndi.generate_binary_structure)(img.ndim, connectivity=1) + # equivalent to ndi.label in binary case. + img = adapter.sync(img) + ccs = adapter(measure.label)(img, connectivity=1) + component_sizes: TYPE_ARRAY = adapter.sync(np.bincount(ccs.ravel())) + too_big: TYPE_ARRAY = component_sizes > max_size too_big_mask = too_big[ccs] img_out = img.copy() img_out[too_big_mask] = 0 @@ -60,34 +64,37 @@ def remove_large_objects(img, max_size): def removeFatlikeTissue(s, params): logging.info(f"{s['filename']} - \tremoveFatlikeTissue") + adapter = s.image_handle.adapter fat_cell_size = int(params.get("fat_cell_size", 64)) kernel_size = int(params.get("kernel_size", 3)) max_keep_size = int(params.get("max_keep_size", 1000)) - img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=fat_cell_size) - img_small = img_reduced & np.invert(s["img_mask_use"]) - img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9) + img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=fat_cell_size) + img_small = adapter.and_(img_reduced, ~s["img_mask_use"]) + img_small = ~adapter(morphology.remove_small_holes)(~img_small, area_threshold=9) + # binary + mask_dilate = adapter(morphology.dilation)(img_small, footprint=np.ones((kernel_size, kernel_size))) - mask_dilate = morphology.dilation(img_small, footprint=np.ones((kernel_size, kernel_size))) - mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size) + mask_dilate_removed = remove_large_objects(mask_dilate, max_size=max_keep_size, adapter=adapter) - mask_fat = mask_dilate & ~mask_dilate_removed + mask_fat = adapter.and_(mask_dilate, ~mask_dilate_removed) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_fatlike.png", img_as_ubyte(mask_fat)) + fname = os.path.join(s["outdir"], f"{s['filename']}_fatlike.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_fat)) s["img_mask_fatlike"] = (mask_fat * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = prev_mask & ~mask_fat + s["img_mask_use"] = adapter.and_(prev_mask, ~mask_fat) - rps = measure.regionprops(morphology.label(mask_fat)) + label_fat = adapter(morphology.label)(mask_fat) + rps = adapter(measure.regionprops)(label_fat) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() else: nobj = area_max = area_mean = 0 - s.addToPrintList("fatlike_tissue_removed_num_regions", str(nobj)) s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean)) s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max)) @@ -100,23 +107,26 @@ def removeFatlikeTissue(s, params): f"remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append(f"After MorphologyModule.removeFatlikeTissue: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") - + return def fillSmallHoles(s, params): logging.info(f"{s['filename']} - \tfillSmallHoles") + adapter = s.image_handle.adapter min_size = int(params.get("min_size", 64)) - img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size) - img_small = img_reduced & np.invert(s["img_mask_use"]) + img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=min_size) + img_small = adapter.and_(img_reduced, np.invert(s["img_mask_use"])) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small)) + fname = os.path.join(s["outdir"], f"{s['filename']}_small_fill.png") + adapter.imsave(fname, adapter(img_as_ubyte)(img_small)) s["img_mask_small_removed"] = (img_small * 255) > 0 prev_mask = s["img_mask_use"] s["img_mask_use"] = img_reduced - rps = measure.regionprops(morphology.label(img_small)) + label_small = adapter(morphology.label)(img_small) + rps = adapter(measure.regionprops)(label_small) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py index cd04be5..b1451f4 100644 --- a/histoqc/SaveModule.py +++ b/histoqc/SaveModule.py @@ -1,38 +1,43 @@ import logging import os -from skimage import io from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage import color import numpy as np from histoqc.BaseImage import BaseImage +from histoqc.array_adapter.typing import TYPE_ARRAY +from histoqc.array_adapter import ArrayAdapter -def blend2Images(img, mask): +def blend2Images(img: TYPE_ARRAY, mask: TYPE_ARRAY, adapter: ArrayAdapter): if img.ndim == 3: - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) if mask.ndim == 3: - mask = color.rgb2gray(mask) + mask = adapter(color.rgb2gray)(mask) img = img[:, :, None] * 1.0 # can't use boolean mask = mask[:, :, None] * 1.0 + # explicitly sync again to satisfy the requirement of using np.concatenate as a unified concatenate func + img, mask = adapter.device_sync_all(img, mask) out = np.concatenate((mask, img, mask), 2) return out def saveFinalMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveUsableRegion") - + adapter = s.image_handle.adapter mask = s["img_mask_use"] for mask_force in s["img_mask_force"]: + mask, s[mask_force] = adapter.device_sync_all(mask, s[mask_force]) mask[s[mask_force]] = 0 - io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_mask_use.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask? img = s.getImgThumb(s["image_work_size"]) - out = blend2Images(img, mask) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out)) - + out = blend2Images(img, mask, adapter) + fname = os.path.join(s["outdir"], f"{s['filename']}_fuse.png") + adapter.imsave(fname, adapter(img_as_ubyte)(out)) return @@ -45,7 +50,7 @@ def saveAssociatedImage(s: BaseImage, key: str, dim: int): logging.warning(message) s["warnings"].append(message) return - + # get asscociated image by key associated_img = image_handle.associated_images[key] width, height = image_handle.__class__.backend_dim(associated_img) @@ -63,7 +68,9 @@ def saveAssociatedImage(s: BaseImage, key: str, dim: int): # resize the pil (RGB) associated_img = associated_img.resize(size).convert("RGB") # save the pil - associated_img.save(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png") + fname = os.path.join(s["outdir"], f"{s['filename']}_{key}.png") + associated_img.save(fname) + return def saveMacro(s, params): @@ -72,10 +79,10 @@ def saveMacro(s, params): return -def saveMask(s, params): +def saveMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveMaskUse") suffix = params.get("suffix", None) - + adapter = s.image_handle.adapter # check suffix param if not suffix: msg = f"{s['filename']} - \tPlease set the suffix for mask use." @@ -83,15 +90,21 @@ def saveMask(s, params): return # save mask - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) + fname = os.path.join(s['outdir'], f"{s['filename']}_{suffix}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_use"])) + + return -def saveThumbnails(s, params): +def saveThumbnails(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveThumbnail") # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one img = s.getImgThumb(params.get("image_work_size", "1.25x")) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img) + adapter = s.image_handle.adapter + fname_thumb = os.path.join(s["outdir"], f"{s['filename']}_thumb.png") + adapter.imsave(fname_thumb, adapter(img_as_ubyte)(img)) img = s.getImgThumb(params.get("small_dim", 500)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img) + fname_small = os.path.join(s["outdir"], f"{s['filename']}_thumb_small.png") + adapter.imsave(fname_small, adapter(img_as_ubyte)(img)) return diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py index 07bf184..65d0383 100644 --- a/histoqc/TileExtractionModule.py +++ b/histoqc/TileExtractionModule.py @@ -6,6 +6,7 @@ import os import json from histoqc.BaseImage import BaseImage +from histoqc.array_adapter import ArrayDevice from typing import Callable, Dict, Any, List, Tuple, Union import numpy as np from PIL import Image, ImageDraw @@ -15,10 +16,7 @@ import logging from typing_extensions import Literal, get_args from PIL.Image import Image as PILImage -# from histoqc.import_wrapper.helper import dynamic_import -# __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] -# Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") -# get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions") + TYPE_TILE_SIZE = Literal['tile_size'] TYPE_TILE_STRIDE = Literal['tile_stride'] @@ -554,7 +552,10 @@ def valid_tile_extraction(self, def extract(s: BaseImage, params: Dict[PARAMS, Any]): + logging.info(f"{s['filename']} - \textract") + + adapter = s.image_handle.adapter with params['lock']: slide_out = s['outdir'] tile_output_dir = params.get('tile_output', os.path.join(slide_out, 'tiles')) @@ -568,9 +569,9 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]): tile_size = int(params.get('tile_size', 256)) tile_stride = int(params.get('tile_stride', 256)) tissue_thresh = float(params.get('tissue_ratio', 0.5)) - - img_use_for_tiles = s.getImgThumb(s["image_work_size"]) - mask_use_for_tiles = s['img_mask_use'] + # no added value from GPU acceleration (except for read_region) as the procedure is sequential + img_use_for_tiles = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU) + mask_use_for_tiles = adapter.move_to_device(s['img_mask_use'], ArrayDevice.CPU) image_handle = s.image_handle img_w, img_h = image_handle.dimensions diff --git a/histoqc/__main__.py b/histoqc/__main__.py index 415f479..430d33f 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -22,6 +22,7 @@ from histoqc._worker import worker_error from histoqc.config import read_config_template from histoqc.data import managed_pkg_data +from histoqc.wsi_handles.constants import KEY_CUCIM @managed_pkg_data @@ -66,7 +67,8 @@ def main(argv=None): args = parser.parse_args(argv) # --- multiprocessing and logging setup ----------------------------------- - + # todo: move config parsing above the mpm initialization and set the start method accordingly + # multiprocessing.set_start_method("spawn") setup_logging(capture_warnings=True, filter_warnings='ignore') mpm = multiprocessing.Manager() lm = MultiProcessingLogManager('histoqc', manager=mpm) @@ -83,6 +85,12 @@ def main(argv=None): lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.") config.read_string(read_config_template(args.config)) + # todo: for cuda --> must use spawn method if CUDA is enabled. + # todo: however a better memory management scheme should be tested. + # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime + # todo: at this moment we simply override the args.nprocesses + if config["BaseImage.BaseImage"].get("handles") == KEY_CUCIM: + args.nprocesses = 0 # --- provide models, pen and templates as fallbacks from package data ---- managed_pkg_data.inject_pkg_data_fallback(config) diff --git a/histoqc/_worker.py b/histoqc/_worker.py index 54af16d..ecf4e6a 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -1,7 +1,7 @@ """histoqc worker functions""" import os import shutil - +import traceback from histoqc.BaseImage import BaseImage from histoqc._pipeline import load_pipeline from histoqc._pipeline import setup_plotting_backend @@ -37,21 +37,19 @@ def worker(idx, file_name, *, log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") try: - s = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage"))) - + s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage"))) for process, process_params in process_queue: process_params["lock"] = lock process_params["shared_dict"] = shared_dict process(s, process_params) s["completed"].append(process.__name__) - except Exception as exc: # reproduce histoqc error string _oneline_doc_str = exc.__doc__.replace('\n', '') err_str = f"{exc.__class__} {_oneline_doc_str} {exc}" - + trace_string = traceback.format_exc() log_manager.logger.error( - f"{file_name} - Error analyzing file (skipping): \t {err_str}" + f"{file_name} - Error analyzing file (skipping): \t {err_str}. Traceback: {trace_string}" ) if exc.__traceback__.tb_next is not None: func_tb_obj = str(exc.__traceback__.tb_next.tb_frame.f_code) @@ -64,7 +62,9 @@ def worker(idx, file_name, *, else: # So long as the gc is triggered to delete the handle, the close is called to release the resources, # as documented in the openslide and cuimage's source code. - s.image_handle = None + # todo: should simply handle the __del__ + s.image_handle.close() + # s.image_handle.handle = None return s diff --git a/histoqc/array_adapter/__init__.py b/histoqc/array_adapter/__init__.py new file mode 100644 index 0000000..c43f502 --- /dev/null +++ b/histoqc/array_adapter/__init__.py @@ -0,0 +1,2 @@ +from .func_mapping import FUNC_MAP +from .adapter import ArrayAdapter, ArrayDevice diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py new file mode 100644 index 0000000..17c46e3 --- /dev/null +++ b/histoqc/array_adapter/adapter.py @@ -0,0 +1,316 @@ +from __future__ import annotations +from histoqc.array_adapter.typing import TYPE_NP, TYPE_CP, TYPE_ARRAY +from histoqc.array_adapter.func_mapping import FUNC_MAP +import numpy as np +from numbers import Number +from typing import Callable, Mapping, Tuple, Optional, Any, Iterable +from typing_extensions import Self, TypeGuard +from histoqc.import_wrapper.cupy_extra import cupy as cp +from enum import Enum +import logging +import functools +from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv +import skimage + + +def cupy_installed() -> bool: + try: + import cupy + return True + except ImportError: + return False + + +class ArrayDevice(Enum): + CPU: str = 'cpu' + CUDA: str = 'cuda' + + @classmethod + def from_bool(cls, on_cpu: bool): + value = 'cpu' if on_cpu else 'cuda' + return cls(value) + + @classmethod + def from_str(cls, str_val: str): + assert isinstance(str_val, str) + return cls(str_val) + + @classmethod + def build(cls, value: str | Self): + if isinstance(value, cls): + return value + if isinstance(value, str): + return cls.from_str(value) + raise TypeError(f'Unexpected type {type(value)}') + + +class ArrayAdapter(Callable): + + func_map: Mapping[Callable, Callable] + input_device: Optional[str | ArrayDevice] + output_device: Optional[str | ArrayDevice] + contingent_type: ArrayDevice + + TYPE_CONTINGENT_DEFAULT: ArrayDevice = ArrayDevice.CUDA + + @staticmethod + def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]: + return isinstance(arr, np.ndarray) + + @staticmethod + def is_cupy(arr: TYPE_CP) -> TypeGuard[TYPE_CP]: + return cupy_installed() and isinstance(arr, cp.ndarray) + + @staticmethod + def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP: + if ArrayAdapter.is_numpy(arr) or isinstance(arr, Number): + return np.array(arr, copy=copy) + assert ArrayAdapter.is_cupy(arr) + return arr.get() + + @staticmethod + def to_cupy(arr: TYPE_ARRAY | Number, copy: bool = False) -> TYPE_CP: + assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \ + f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}" + return cp.array(arr, copy=copy) + + @staticmethod + def array_device_type(arr: TYPE_ARRAY) -> ArrayDevice: + on_cpu = isinstance(arr, np.ndarray) + return ArrayDevice.from_bool(on_cpu) + + @staticmethod + def is_array(arr: TYPE_ARRAY) -> bool: + return isinstance(arr, np.ndarray) or (cupy_installed() and isinstance(arr, cp.ndarray)) + + @classmethod + def new_array(cls, arr: Any, array_device: ArrayDevice) -> TYPE_ARRAY: + if cls.is_array(arr): + return cls.move_to_device(arr, array_device, copy=False) + if isinstance(arr, Iterable): + arr = cls.curate_array_device(*arr, device=array_device, copy=False) + if array_device is array_device.CUDA: + assert cupy_installed() + return cp.asarray(arr) + elif array_device is array_device.CPU: + return np.asarray(arr) + raise TypeError(f'Unexpected device {ArrayDevice}') + + def asarray(self, arr: Any) -> TYPE_ARRAY: + return self.__class__.new_array(arr, self.output_device) + + @classmethod + def move_to_device(cls, + arr: Optional[TYPE_ARRAY], + device: Optional[ArrayDevice], copy: bool = False) -> TYPE_ARRAY: + # structural match > py3.10 + if device is None or not cls.is_array(arr): + return arr + if device is ArrayDevice.CPU: + return ArrayAdapter.to_numpy(arr, copy=copy) + elif device is ArrayDevice.CUDA: + return ArrayAdapter.to_cupy(arr, copy=copy) + raise ValueError(f"Unsupported device: {device}") + + def sync(self, arr: Optional[TYPE_ARRAY | Number], copy: bool = False) -> TYPE_ARRAY: + if not self.__class__.is_array(arr): + return arr + return self.__class__.move_to_device(arr, device=self.output_device, copy=copy) + + @classmethod + def curate_device_helper(cls, output: TYPE_ARRAY, device: Optional[ArrayDevice], copy: bool): + if output is not None and cls.is_array(output): + output = cls.move_to_device(output, device, copy=copy) + return output + + @classmethod + def curate_array_device(cls, *arrays: TYPE_ARRAY, + device: Optional[ArrayDevice], copy: bool) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + # already an array - no need to recursively unpack + if cls.is_array(arrays): + return cls.curate_device_helper(arrays, device, copy) + # not array and not iterable --> unpack + if not isinstance(arrays, Iterable): + return arrays + # only one input + if len(arrays) == 1: + return cls.curate_device_helper(arrays[0], device=device, copy=copy) + out_list = [] + for o in arrays: + out_list.append(cls.curate_device_helper(o, device=device, copy=copy)) + return tuple(out_list) + + @staticmethod + def get_api(cpu_func: Callable, + func_map: Mapping[Callable, Callable], device_type: ArrayDevice) -> Tuple[Callable, ArrayDevice]: + if device_type == ArrayDevice.CPU: + return cpu_func, ArrayDevice.CPU + mapped = func_map.get(cpu_func, None) + if mapped is not None: + return mapped, ArrayDevice.CUDA + # if not implemented + func_name = getattr(cpu_func, '__qualname__', cpu_func.__name__) + logging.info(f"{__name__}: {func_name} does not have a GPU implementation. Revert to CPU") + return cpu_func, ArrayDevice.CPU + + @classmethod + def unified_call(cls, + cpu_func: Callable, + func_map: Mapping[Callable, Callable], + input_device: Optional[str | ArrayDevice], + output_device: Optional[str | ArrayDevice], + data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: + # use input_device to override the current device, if not None + data = cls.curate_array_device(data, device=input_device, copy=False) + input_type = cls.array_device_type(data) + # attempt to fetch the op, revert to CPU if GPU impl is not available + func, func_device = cls.get_api(cpu_func, func_map, input_type) + func_in = cls.curate_array_device(data, device=func_device, copy=False) + + curated_args = cls.curate_array_device(*args, device=func_device, copy=False) + curated_kwargs = dict() + + for k, v in kwargs.items(): + curated_kwargs[k] = cls.curate_array_device(v, device=func_device, copy=False) + + output = func(func_in, *curated_args, **curated_kwargs) + # only move the output around if the output is an array + if isinstance(output, tuple): + return cls.curate_array_device(*output, device=output_device, copy=False) + return cls.curate_array_device(output, device=output_device, copy=False) + + @classmethod + def _validate_device(cls, device_type: Optional[ArrayDevice]) -> Optional[ArrayDevice]: + if device_type is None: + return None + if device_type is ArrayDevice.CPU: + return device_type + assert device_type is ArrayDevice.CUDA, f"Unsupported device_type: {device_type}" + if not cupy_installed(): + logging.info(f"Cupy is not installed. Revert to CPU") + return ArrayDevice.CPU + return device_type + + def __init__(self, + input_device: Optional[str | ArrayDevice], + output_device: Optional[str | ArrayDevice], + func_map: Mapping[Callable, Callable], + contingent_type: ArrayDevice, + ): + self.input_device = self.__class__._validate_device(input_device) + self.output_device = self.__class__._validate_device(output_device) + self.func_map = func_map + self.contingent_type = contingent_type + + @classmethod + def build(cls, + input_device: Optional[str | ArrayDevice], + output_device: Optional[str | ArrayDevice], + func_map: Mapping[Callable, Callable] = FUNC_MAP, + contingent_type: ArrayDevice = TYPE_CONTINGENT_DEFAULT): + return cls(input_device=input_device, output_device=output_device, func_map=func_map, + contingent_type=contingent_type) + + def apply(self, /, cpu_func: Callable, data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: + return self.unified_call(cpu_func, self.func_map, self.input_device, self.output_device, + data, *args, **kwargs) + + def __call__(self, cpu_func: Callable) -> Callable: + return functools.partial(self.apply, cpu_func) + + @staticmethod + def __sync_device_output_helper(*arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + assert len(arrays) > 0 + if len(arrays) == 1: + return arrays[0] + return arrays + + @classmethod + def device_sync_all_helper(cls, *arrays: TYPE_ARRAY, + array_device: Optional[ArrayDevice], + contingent_type: Optional[ArrayDevice]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + assert isinstance(arrays, tuple), f"input check. {type(arrays)} is not a tuple" + if array_device is not None: + return cls.__sync_device_output_helper(tuple(cls.move_to_device(arr, array_device) for arr in arrays)) + assert array_device is None + has_contingent_device = any(cls.array_device_type(arr) is contingent_type for arr in arrays) + if has_contingent_device: + assert contingent_type is not None + return cls.__sync_device_output_helper(cls.device_sync_all_helper(*arrays, + array_device=contingent_type, + contingent_type=None)) + return cls.__sync_device_output_helper(arrays) + + def device_sync_all(self, *arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + return self.__class__.device_sync_all_helper(*arrays, array_device=self.output_device, + contingent_type=self.contingent_type) + + @classmethod + def binary_operation(cls, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY, + input_device: Optional[ArrayDevice], + output_device: Optional[ArrayDevice], + contingent_type: Optional[ArrayDevice], + op: Callable) -> TYPE_ARRAY: + arr1, arr2 = cls.device_sync_all_helper(arr1, arr2, array_device=input_device, + contingent_type=contingent_type) + result: TYPE_ARRAY = op(arr1, arr2) + return cls.curate_array_device(result, device=output_device, copy=False) + + def and_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=and_) + + def or_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=or_) + + def add(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=add) + + def sub(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=sub) + + def mul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=mul) + + def matmul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=matmul) + + def truediv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=truediv) + + def floordiv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=floordiv) + + def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_type=self.contingent_type, + op=xor) + + @classmethod + def imsave(cls, filename: str, arr: TYPE_ARRAY): + arr = cls.curate_array_device(arr, device=ArrayDevice.CPU, copy=False) + return skimage.io.imsave(filename, arr) diff --git a/histoqc/array_adapter/array_api_compat.py b/histoqc/array_adapter/array_api_compat.py new file mode 100644 index 0000000..4527a6e --- /dev/null +++ b/histoqc/array_adapter/array_api_compat.py @@ -0,0 +1,4 @@ +try: + import array_api_compat +except ImportError: + ... diff --git a/histoqc/array_adapter/func_mapping.py b/histoqc/array_adapter/func_mapping.py new file mode 100644 index 0000000..aa55b46 --- /dev/null +++ b/histoqc/array_adapter/func_mapping.py @@ -0,0 +1,51 @@ +import scipy.ndimage +import skimage +from typing import Callable, Mapping +from scipy import ndimage as sci_ndi +import numpy as np +try: + import cupy as cp + from scipy import signal as sci_signal + from cupyx.scipy import signal as cu_signal + from cucim import skimage as cu_skimage + from cupyx.scipy import ndimage as cu_ndi + + FUNC_MAP: Mapping[Callable, Callable] = { + skimage.color.convert_colorspace: cu_skimage.color.convert_colorspace, + skimage.color.rgb2gray: cu_skimage.color.rgb2gray, + skimage.color.separate_stains: cu_skimage.color.separate_stains, + skimage.exposure.equalize_hist: cu_skimage.exposure.equalize_hist, + # not implemented + # skimage.feature.graycomatrix: cu_skimage.feature.graycomatrix, + # skimage.feature.local_binary_pattern: cu_skimage.feature.local_binary_pattern, + skimage.filters.frangi: cu_skimage.filters.frangi, + skimage.filters.gaussian: cu_skimage.filters.gaussian, + skimage.filters.gabor_kernel: cu_skimage.filters.gabor_kernel, + skimage.filters.gabor: cu_skimage.filters.gabor, + skimage.filters.laplace: cu_skimage.filters.laplace, + skimage.filters.median: cu_skimage.filters.median, + + # skimage.filters.rank.otsu: cu_skimage.filters.rank.otsu, + skimage.filters.sobel: cu_skimage.filters.sobel, + skimage.filters.threshold_otsu: cu_skimage.filters.threshold_otsu, + skimage.measure.regionprops: cu_skimage.measure.regionprops, + skimage.morphology.binary_opening: cu_skimage.morphology.binary_opening, + # the morphology.label is just an alias of the measure.label + skimage.morphology.label: cu_skimage.measure.label, + skimage.morphology.dilation: cu_skimage.morphology.dilation, + skimage.morphology.disk: cu_skimage.morphology.disk, + skimage.morphology.remove_small_holes: cu_skimage.morphology.remove_small_holes, + skimage.morphology.remove_small_objects: cu_skimage.morphology.remove_small_objects, + skimage.transform.resize: cu_skimage.transform.resize, + skimage.util.img_as_bool: cu_skimage.util.img_as_bool, + skimage.util.img_as_ubyte: cu_skimage.util.img_as_ubyte, + sci_ndi.convolve: cu_ndi.convolve, + + # can be replaced by erosion, but is actually slower for uint dtypes + skimage.filters.rank.minimum: None, # cu_skimage.morphology.erosion, + sci_signal.convolve2d: cu_signal.convolve2d, + sci_ndi.generate_binary_structure: cu_ndi.generate_binary_structure, + np.digitize: cp.digitize + } +except ImportError: + FUNC_MAP = dict() diff --git a/histoqc/array_adapter/implementation.py b/histoqc/array_adapter/implementation.py new file mode 100644 index 0000000..e69de29 diff --git a/histoqc/array_adapter/typing.py b/histoqc/array_adapter/typing.py new file mode 100644 index 0000000..900ceac --- /dev/null +++ b/histoqc/array_adapter/typing.py @@ -0,0 +1,13 @@ +from __future__ import annotations +import numpy as np +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # for forward reference only -- annotate the objects from the optional cupy dependency + # noinspection PyUnresolvedReferences + import cupy as cp + + +TYPE_NP = np.ndarray +TYPE_CP = "cp.ndarray" +TYPE_ARRAY = "np.ndarray | cp.ndarray" diff --git a/histoqc/config/config.ini b/histoqc/config/config.ini index 0ee89bd..86872de 100644 --- a/histoqc/config/config.ini +++ b/histoqc/config/config.ini @@ -26,6 +26,7 @@ steps= BasicModule.getBasicStats [BaseImage.BaseImage] image_work_size = 1.25x +handles = openslide #not yet implemented confirm_base_mag: False diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py new file mode 100644 index 0000000..c67d3e1 --- /dev/null +++ b/histoqc/import_wrapper/cupy_extra.py @@ -0,0 +1,6 @@ +try: + import cupy +except ImportError: + cupy = None +finally: + cp = cupy diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py index c5b5713..9a65a71 100644 --- a/histoqc/wsi_handles/base.py +++ b/histoqc/wsi_handles/base.py @@ -1,10 +1,13 @@ +from __future__ import annotations from abc import ABC, abstractmethod + from histoqc.import_wrapper import dynamic_import import logging from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic import numpy as np from PIL.Image import Image as PILImage from typing_extensions import final +from histoqc.array_adapter import ArrayDevice, ArrayAdapter import os from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER @@ -18,6 +21,7 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]): handle: T fname: str + _adapter: ArrayAdapter @staticmethod def curate_shorter_edge(width, height, limit, aspect_ratio): @@ -314,6 +318,7 @@ def __create_handle(cls, fname: str, # noinspection PyBroadException try: image_handle = handle_class(fname) + break except Exception: # current wsi handle class doesn't support this file msg = f"WSIImageHandle: \"{handle_class}\" doesn't support {fname}" @@ -337,4 +342,24 @@ def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle": def __init__(self, fname: str): self.fname = fname + self._adapter = ArrayAdapter.build(input_device=self.device, output_device=self.device) + + @abstractmethod + def close_handle(self): + ... + + def close(self): + self.close_handle() + self.handle = None + def is_closed(self): + return not hasattr(self, "handle") or self.handle is None + + @property + @abstractmethod + def device(self) -> ArrayDevice: + raise NotImplementedError + + @property + def adapter(self) -> ArrayAdapter: + return self._adapter diff --git a/histoqc/wsi_handles/constants.py b/histoqc/wsi_handles/constants.py index 16a297b..9fcec88 100644 --- a/histoqc/wsi_handles/constants.py +++ b/histoqc/wsi_handles/constants.py @@ -14,7 +14,7 @@ WSI_HANDLES: Dict[str, Tuple[str, str]] = { KEY_OPENSLIDE: (MODULE_OPENSLIDE, CLASS_OPENSLIDE), # todo: add unified interface - # KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM), + KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM), } HANDLE_DELIMITER = ',' diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py index e37172a..c4df848 100644 --- a/histoqc/wsi_handles/cuimage_handle.py +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -10,6 +10,7 @@ from lazy_property import LazyProperty import numpy as np from cucim import skimage as c_skimage +from histoqc.array_adapter import ArrayDevice class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): @@ -114,7 +115,9 @@ def get_thumbnail(self, new_dim): aspect_ratio = self.dimensions[0] / self.dimensions[1] target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) - return c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w)) + resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w)) + + return c_skimage.util.img_as_ubyte(resized) def get_best_level_for_downsample(self, down_factor: float) -> int: """Return the largest level that's smaller than the target downsample factor, consistent with openslide. @@ -164,8 +167,20 @@ def grid_stack(grid: List[List[cp.ndarray]]): @staticmethod def backend_dim(region: CuImage) -> Tuple[int, int]: - return cast(Tuple[int, int], tuple(region.size()[:2])) + return cast(Tuple[int, int], tuple(region.size()[:2][::-1])) @staticmethod def array_shape(arr: cp.ndarray) -> Tuple[int, ...]: return arr.shape + + def close_handle(self): + if hasattr(self, "handle") and self.handle is not None: + self.handle.close() + self.handle = None + if self.dummy_handle is not None: + self.dummy_handle.close() + self.dummy_handle = None + + @property + def device(self) -> ArrayDevice: + return ArrayDevice.CUDA diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py index 6388329..c426cf2 100644 --- a/histoqc/wsi_handles/openslide_handle.py +++ b/histoqc/wsi_handles/openslide_handle.py @@ -8,6 +8,7 @@ from PIL.Image import Image as PILImage from .utils import rgba2rgb_pil from PIL import Image +from histoqc.array_adapter import ArrayDevice class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]): @@ -143,3 +144,12 @@ def backend_dim(region: PILImage) -> Tuple[int, int]: @staticmethod def array_shape(arr: np.ndarray) -> Tuple[int, ...]: return arr.shape + + def close_handle(self): + if hasattr(self, "handle"): + self.handle.close() + self.handle = None + + @property + def device(self) -> ArrayDevice: + return ArrayDevice.CPU diff --git a/imported_functions_list.txt b/imported_functions_list.txt new file mode 100644 index 0000000..e69de29 From e35e1bac2316d5a80a8725405d1892ff4ca071ba Mon Sep 17 00:00:00 2001 From: CielAl Date: Thu, 2 May 2024 10:40:14 -0400 Subject: [PATCH 3/6] edge case wherein no comment is available from WSI --- histoqc/BasicModule.py | 3 ++- histoqc/ClassificationModule.py | 2 -- histoqc/wsi_handles/cuimage_handle.py | 8 ++++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py index fa90ff7..dfc59a7 100644 --- a/histoqc/BasicModule.py +++ b/histoqc/BasicModule.py @@ -17,7 +17,8 @@ def getBasicStats(s: BaseImage, params): s.addToPrintList("width", osh.dimensions[0] if len(osh.dimensions) >= 2 else "NA") s.addToPrintList("mpp_x", osh.mpp_x) s.addToPrintList("mpp_y", osh.mpp_y) - s.addToPrintList("comment", osh.comment.replace("\n", " ").replace("\r", " ")) + comment = osh.comment if osh.comment else "" + s.addToPrintList("comment", comment.replace("\n", " ").replace("\r", " ")) return diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index b2cbb4a..1cfa732 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -181,8 +181,6 @@ def compute_features(img, params): def byExampleWithFeatures(s: BaseImage, params): - device = s.image_handle.device - adapter = ArrayAdapter.build(input_device=device, output_device=device) name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py index c4df848..df23ee3 100644 --- a/histoqc/wsi_handles/cuimage_handle.py +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -11,6 +11,7 @@ import numpy as np from cucim import skimage as c_skimage from histoqc.array_adapter import ArrayDevice +import gc class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): @@ -173,13 +174,20 @@ def backend_dim(region: CuImage) -> Tuple[int, int]: def array_shape(arr: cp.ndarray) -> Tuple[int, ...]: return arr.shape + def release(self): + cp.get_default_memory_pool().free_all_blocks() + cp.get_default_pinned_memory_pool().free_all_blocks() + def close_handle(self): if hasattr(self, "handle") and self.handle is not None: self.handle.close() + del self.handle self.handle = None + gc.collect() if self.dummy_handle is not None: self.dummy_handle.close() self.dummy_handle = None + self.release() @property def device(self) -> ArrayDevice: From 39fa170eafed8e3e2846b7da1037ffc0b4f34795 Mon Sep 17 00:00:00 2001 From: CielAl Date: Sun, 5 May 2024 20:06:20 -0400 Subject: [PATCH 4/6] simple mp support for GPU --- histoqc/AnnotationModule.py | 6 +- histoqc/BaseImage.py | 41 +++- histoqc/BasicModule.py | 2 - histoqc/BlurDetectionModule.py | 3 +- histoqc/BubbleRegionByRegion.py | 5 +- histoqc/ClassificationModule.py | 17 +- histoqc/DeconvolutionModule.py | 1 - histoqc/HistogramModule.py | 9 +- histoqc/LightDarkModule.py | 5 +- histoqc/LocalTextureEstimationModule.py | 4 +- histoqc/MorphologyModule.py | 5 +- histoqc/TileExtractionModule.py | 8 +- histoqc/__main__.py | 87 +++++-- histoqc/_worker.py | 38 ++- histoqc/array_adapter/__init__.py | 2 +- histoqc/array_adapter/adapter.py | 314 +++++++++++++++++------- histoqc/array_adapter/func_mapping.py | 11 +- histoqc/import_wrapper/cupy_extra.py | 2 + histoqc/wsi_handles/base.py | 51 ++-- histoqc/wsi_handles/cuimage_handle.py | 119 ++++++--- histoqc/wsi_handles/openslide_handle.py | 38 ++- 21 files changed, 538 insertions(+), 230 deletions(-) diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py index b36a7fc..f5e1be8 100644 --- a/histoqc/AnnotationModule.py +++ b/histoqc/AnnotationModule.py @@ -1,7 +1,7 @@ import logging from typing import List, Tuple from histoqc.BaseImage import printMaskHelper, BaseImage -from histoqc.array_adapter import ArrayDevice, ArrayAdapter +from histoqc.array_adapter import ArrayAdapter, Device from skimage import io from skimage.util import img_as_ubyte import os @@ -78,7 +78,9 @@ def getParams(s: BaseImage, params): def saveAnnotationMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetAnnotationMask") # quite pointless to enforce GPU acceleration here. Force to use CPU mode - adaptor = ArrayAdapter.build(input_device=ArrayDevice.CPU, output_device=ArrayDevice.CPU) + adaptor = ArrayAdapter.build(input_device=Device.build(Device.DEVICE_CPU), + output_device=Device.build(Device.DEVICE_CPU), + contingent_device=Device.build(Device.DEVICE_CPU)) (ann_format, file_path, suffix) = getParams(s, params) diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index aa507b5..97fe60c 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -1,15 +1,19 @@ from __future__ import annotations import logging import os +import sys + import numpy as np import zlib +import traceback import dill from distutils.util import strtobool import re from typing import Union, Tuple, cast, Optional from histoqc.wsi_handles.base import WSIImageHandle -from histoqc.wsi_handles.constants import KEY_OPENSLIDE, KEY_CUCIM +from histoqc.wsi_handles.constants import KEY_CUCIM from histoqc.array_adapter.typing import TYPE_ARRAY +from histoqc.array_adapter import ArrayDeviceType _REGEX_MAG = r"^(\d?\.?\d*X?)" _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE) MAG_NA = None @@ -36,22 +40,33 @@ class BaseImage(dict): - _image_handle: WSIImageHandle + _image_handle: Optional[WSIImageHandle] + _device_id: Optional[int] @property - def image_handle(self) -> WSIImageHandle: - return self._image_handle + def image_handle(self) -> Optional[WSIImageHandle]: + if hasattr(self, "_image_handle"): + return self._image_handle + return None @image_handle.setter def image_handle(self, image_handle: WSIImageHandle): self._image_handle = image_handle - def __init__(self, fname, fname_outdir, params): + def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None): dict.__init__(self) + # init + self._device_id = device_id + self._image_handle = None handles = params.get("handles", KEY_CUCIM) - # dynamically load wsi image handle - self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles) + # dynamically load wsi image handle + try: + self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles, device_id=device_id) + except Exception: + trace_string = traceback.format_exc() + logging.error(f"{__name__}: {fname} -- Error Creating Handle - Traceback: {trace_string}") + sys.exit(1) self.in_memory_compression = strtobool(params.get("in_memory_compression", "False")) self["warnings"] = [''] # this needs to be first key in case anything else wants to add to it @@ -100,11 +115,13 @@ def __init__(self, fname, fname_outdir, params): def is_img_data(key: str) -> bool: return key.startswith("img") and key != "img_bbox" - def _sync_to_handle(self, key, value): + def _sync_to_handle(self, key, value, device: Optional[ArrayDeviceType] = None): if not self.__class__.is_img_data(key): return value if hasattr(self, "_image_handle") and self.image_handle is not None: - value = self.image_handle.adapter.sync(value) + device = device if device is not None else self.image_handle.device + value = self.image_handle.adapter.__class__.curate_arrays_device(value, + device=device, copy=False) return value def __getitem__(self, key): @@ -149,7 +166,7 @@ def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in osh.level_downsamples] level = np.where(relative_down_factors_idx)[0] if level.size: - return level[0], True + return cast(int, level[0]), True else: return osh.get_best_level_for_downsample(downsample_factor), False @@ -234,9 +251,11 @@ def getImgThumb(self, size: str) -> Optional[TYPE_ARRAY]: logging.info( f"{self['filename']} - \t\tloading image from level {target_level} of size" f" {image_handle.level_dimensions[target_level]}") + # PILLOW tile = image_handle.read_region((bx, by), target_level, size) + self[key] = (np.asarray(self.image_handle.backend_rgba2rgb(tile)) - if np.shape(tile)[-1] == 4 + if len(tile.getbands()) == 4 else np.asarray(tile)) # specifies a desired size of thumbnail diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py index dfc59a7..94cfe40 100644 --- a/histoqc/BasicModule.py +++ b/histoqc/BasicModule.py @@ -1,9 +1,7 @@ import logging import os from histoqc.BaseImage import printMaskHelper -from histoqc.array_adapter import ArrayAdapter, ArrayDevice from skimage.morphology import remove_small_objects, binary_opening, disk -from skimage import io from skimage.util import img_as_ubyte from histoqc.BaseImage import BaseImage diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py index 7ea4993..176a132 100644 --- a/histoqc/BlurDetectionModule.py +++ b/histoqc/BlurDetectionModule.py @@ -3,11 +3,10 @@ import skimage from histoqc.BaseImage import printMaskHelper, BaseImage -from skimage import io, morphology, measure +from skimage import morphology, measure from skimage.util import img_as_ubyte from skimage.color import rgb2gray import numpy as np -from histoqc.array_adapter import ArrayAdapter, FUNC_MAP, ArrayDevice # Analysis of focus measure operators for shape-from-focus # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012 diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py index fc2bf75..da253c5 100644 --- a/histoqc/BubbleRegionByRegion.py +++ b/histoqc/BubbleRegionByRegion.py @@ -9,7 +9,7 @@ from skimage.color import rgb2gray from skimage.morphology import remove_small_objects from histoqc.BaseImage import BaseImage -from skimage import io, color +from skimage import color import numpy as np @@ -47,7 +47,7 @@ def roiWise(s: BaseImage, params): # todo: confirm -- the original level is hardcoded to be 1, shouldn't it be the level variable? region = osh.region_backend((x, y), level, (win_size, win_size)) - region = osh.backend_to_array(region)[..., :3] + region = osh.backend_to_array(region, osh.device)[..., :3] g = adapter(rgb2gray)(region) # todo -- forward compatibility. Later version of frangi alters the signatures sigmas = frangi_scale_range + (frangi_scale_step,) @@ -114,4 +114,3 @@ def detectSmoothness(s: BaseImage, params): s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") return - diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 1cfa732..6c7f81f 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -2,7 +2,7 @@ import os import re import sys -from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.array_adapter import ArrayAdapter, Device from ast import literal_eval as make_tuple from distutils.util import strtobool @@ -15,8 +15,6 @@ from skimage.morphology import remove_small_objects, disk, dilation from skimage.feature import local_binary_pattern -from scipy import ndimage as ndi - from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier @@ -40,12 +38,13 @@ def pixelWise(s: BaseImage, params): # todo no formal support for GNB now # todo Possible solution: sklearn with array-api-compat and implement a wrapper into the ArrayAdaptor # todo Also: need to rework the GaussianNB.fit interface into a wrapper. - device = s.image_handle.device - adapter = ArrayAdapter.build(input_device=device, output_device=device) - img = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU) + device = s.image_handle.device_type + adapter: ArrayAdapter = s.image_handle.adapter + img = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]), + device=Device.build(Device.DEVICE_CPU)) gnb = GaussianNB() - gnb.fit(model_vals[:, 1:], model_vals[:, 0]) + adapter(gnb.fit)(model_vals[:, 1:], model_vals[:, 0]) cal = adapter(gnb.predict_proba)(img.reshape(-1, 3)) cal = cal.reshape(img.shape[0], img.shape[1], 2) @@ -87,7 +86,6 @@ def compute_laplace(img, params): return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None] - def compute_lbp(img, params): lbp_radius = float(params.get("lbp_radius", 3)) lbp_points = int(params.get("lbp_points", 24)) # example sets radius * 8 @@ -99,7 +97,6 @@ def compute_lbp(img, params): return adapter(local_binary_pattern)(img_gray, P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] - def compute_gaussian(img, params): adapter = params["adapter"] gaussian_sigma = int(params.get("gaussian_sigma", 1)) @@ -242,8 +239,8 @@ def byExampleWithFeatures(s: BaseImage, params): # do stuff here with model_vals model_vals = np.vstack(model_vals) clf = RandomForestClassifier(n_jobs=-1) - # adapter(clf.fit)(model_vals, model_labels.ravel()) adapter(clf.fit)(model_vals, y=model_labels.ravel()) + # clf.fit(model_vals, y=model_labels.ravel()) params["shared_dict"]["model_" + name] = clf logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py index f3b4613..91a943e 100644 --- a/histoqc/DeconvolutionModule.py +++ b/histoqc/DeconvolutionModule.py @@ -2,7 +2,6 @@ import os import sys import numpy as np -from skimage import io from skimage.util import img_as_ubyte from histoqc.BaseImage import BaseImage from skimage.color import separate_stains diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py index 901beeb..0dd8a21 100644 --- a/histoqc/HistogramModule.py +++ b/histoqc/HistogramModule.py @@ -6,7 +6,7 @@ from distutils.util import strtobool from histoqc.BaseImage import BaseImage from typing import Union -from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.array_adapter import ArrayAdapter, Device from histoqc.array_adapter.typing import TYPE_ARRAY # todo: beware that because there is no lock, it is likely that each worker will compute the template of their own. # this holds a local copy of the histograms of the template images so that they need only be computed once @@ -15,15 +15,16 @@ def getHistogram(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetHistogram") - adapter = s.image_handle.adapter + # adapter = s.image_handle.adapter limit_to_mask = strtobool(params.get("limit_to_mask", True)) bins = int(params.get("bins", 20)) img = s.getImgThumb(s["image_work_size"]) tissue_mask = s["img_mask_use"] # matplotlib --> pointless to use GPU here even if a corresponding API exists - img = adapter.move_to_device(img, ArrayDevice.CPU) - tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU) + img, tissue_mask = ArrayAdapter.curate_arrays_device(img, tissue_mask, + device=Device.build(Device.DEVICE_CPU)) + # tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU) if limit_to_mask: img = img[tissue_mask] else: diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py index 4fe8a8d..2312089 100644 --- a/histoqc/LightDarkModule.py +++ b/histoqc/LightDarkModule.py @@ -3,7 +3,7 @@ import numpy as np from histoqc.BaseImage import printMaskHelper, BaseImage from skimage import io, color -from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.array_adapter import ArrayAdapter, Device from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage.filters import threshold_otsu, rank @@ -184,7 +184,8 @@ def saveEqualisedImage(s: BaseImage, params): img = adapter(color.rgb2gray)(img) img_u8 = adapter(img_as_ubyte)(img) out = adapter(exposure.equalize_hist)(img_u8) - out_u8 = adapter.move_to_device(adapter(img_as_ubyte)(out), ArrayDevice.CPU) + out_u8 = ArrayAdapter.curate_arrays_device(adapter(img_as_ubyte)(out), + device=Device.build(Device.DEVICE_CPU)) io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", out_u8) return diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py index 2e28762..d7babd3 100644 --- a/histoqc/LocalTextureEstimationModule.py +++ b/histoqc/LocalTextureEstimationModule.py @@ -3,7 +3,7 @@ from skimage import color from distutils.util import strtobool from skimage.feature import graycomatrix, graycoprops -from histoqc.array_adapter import ArrayAdapter, ArrayDevice +from histoqc.array_adapter import ArrayAdapter, Device from histoqc.BaseImage import BaseImage @@ -31,7 +31,7 @@ def estimateGreyComatrixFeatures(s: BaseImage, params): return maskidx = mask.nonzero() - maskidx = ArrayAdapter.new_array(maskidx, array_device=ArrayDevice.CPU).transpose() + maskidx = ArrayAdapter.new_array(maskidx, array_device=Device.build(Device.DEVICE_CPU)).transpose() idx = np.random.choice(maskidx.shape[0], npatches) results = [] diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py index 4d2c301..426a358 100644 --- a/histoqc/MorphologyModule.py +++ b/histoqc/MorphologyModule.py @@ -4,10 +4,8 @@ from histoqc.BaseImage import printMaskHelper, BaseImage from histoqc.array_adapter import ArrayAdapter from histoqc.array_adapter.typing import TYPE_ARRAY -from skimage import io, morphology, measure +from skimage import morphology, measure from skimage.util import img_as_ubyte -from scipy import ndimage as ndi -from typing import cast def removeSmallObjects(s: BaseImage, params): @@ -109,6 +107,7 @@ def removeFatlikeTissue(s, params): f"detectable! Downstream modules likely to be incorrect/fail") return + def fillSmallHoles(s, params): logging.info(f"{s['filename']} - \tfillSmallHoles") adapter = s.image_handle.adapter diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py index 65d0383..e3cf69e 100644 --- a/histoqc/TileExtractionModule.py +++ b/histoqc/TileExtractionModule.py @@ -6,7 +6,7 @@ import os import json from histoqc.BaseImage import BaseImage -from histoqc.array_adapter import ArrayDevice +from histoqc.array_adapter import Device from typing import Callable, Dict, Any, List, Tuple, Union import numpy as np from PIL import Image, ImageDraw @@ -36,6 +36,7 @@ TYPE_BBOX_INT = Tuple[int, int, int, int] +# noinspection PyUnusedLocal def default_screen_identity(img: np.ndarray): return True @@ -570,8 +571,9 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]): tile_stride = int(params.get('tile_stride', 256)) tissue_thresh = float(params.get('tissue_ratio', 0.5)) # no added value from GPU acceleration (except for read_region) as the procedure is sequential - img_use_for_tiles = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU) - mask_use_for_tiles = adapter.move_to_device(s['img_mask_use'], ArrayDevice.CPU) + img_use_for_tiles, mask_use_for_tiles = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]), + s['img_mask_use'], + device=Device.build(Device.DEVICE_CPU)) image_handle = s.image_handle img_w, img_h = image_handle.dimensions diff --git a/histoqc/__main__.py b/histoqc/__main__.py index 430d33f..8462cc1 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -8,6 +8,7 @@ import sys import time from functools import partial +from typing import Tuple, Optional, List from histoqc._pipeline import BatchedResultFile from histoqc._pipeline import MultiProcessingLogManager @@ -20,9 +21,55 @@ from histoqc._worker import worker_setup from histoqc._worker import worker_success from histoqc._worker import worker_error +from histoqc._worker import PARAM_SHARE, device_assign, KEY_ASSIGN from histoqc.config import read_config_template from histoqc.data import managed_pkg_data from histoqc.wsi_handles.constants import KEY_CUCIM +from histoqc.array_adapter.adapter import cupy_installed +from histoqc.import_wrapper.cupy_extra import cp + + +def parse_config(args: argparse.Namespace) -> Tuple[configparser.ConfigParser, Optional[str]]: + config = configparser.ConfigParser() + msg = None + if not args.config: + msg = f"Configuration file not set (--config), using default" + config.read_string(read_config_template('default')) + elif os.path.exists(args.config): + config.read(args.config) # Will read the config file + else: + msg = f"Configuration file {args.config} assuming to be a template...checking." + config.read_string(read_config_template(args.config)) + return config, msg + + +def _get_device_list(n_proc: int): + return list(range(n_proc)) if n_proc > 0 else [0] + + +def parse_multiprocessing(args: argparse.Namespace, + config: configparser.ConfigParser) -> Tuple[argparse.Namespace, List[int]]: + is_multiproc = args.nprocesses >= 0 + is_cuda = KEY_CUCIM in config["BaseImage.BaseImage"].get("handles", "") + + # if use cuda but without installation of dependencies - return + if not is_cuda or not cupy_installed(): + return args, _get_device_list(args.nprocesses) + # guard + assert is_cuda and cp is not None, f"Enable CUDA but cupy is not installed" + # set spawn + if is_multiproc: + multiprocessing.set_start_method("spawn", force=True) + num_devices = cp.cuda.runtime.getDeviceCount() + # n_proc cannot exceed num of GPUs. + assert num_devices > 0, f"Fail to detect usable CUDA devices" + if args.nprocesses > num_devices: + logging.warning(f"{__name__}: CUDA enabled but number of processes is greater than number of devices:" + f"{args.nprocesses} > {num_devices}. Cutoff the number of processes to {num_devices}") + args.nprocesses = min(args.nprocesses, num_devices) + # device list --> if + device_list = _get_device_list(args.nprocesses) + return args, device_list @managed_pkg_data @@ -69,28 +116,17 @@ def main(argv=None): # --- multiprocessing and logging setup ----------------------------------- # todo: move config parsing above the mpm initialization and set the start method accordingly # multiprocessing.set_start_method("spawn") + setup_logging(capture_warnings=True, filter_warnings='ignore') - mpm = multiprocessing.Manager() - lm = MultiProcessingLogManager('histoqc', manager=mpm) # --- parse the pipeline configuration ------------------------------------ + config, conf_warn_msg = parse_config(args) + args, device_list = parse_multiprocessing(args, config) + mpm = multiprocessing.Manager() + lm = MultiProcessingLogManager('histoqc', manager=mpm) + if conf_warn_msg: + lm.logger.warning(conf_warn_msg) - config = configparser.ConfigParser() - if not args.config: - lm.logger.warning(f"Configuration file not set (--config), using default") - config.read_string(read_config_template('default')) - elif os.path.exists(args.config): - config.read(args.config) # Will read the config file - else: - lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.") - config.read_string(read_config_template(args.config)) - - # todo: for cuda --> must use spawn method if CUDA is enabled. - # todo: however a better memory management scheme should be tested. - # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime - # todo: at this moment we simply override the args.nprocesses - if config["BaseImage.BaseImage"].get("handles") == KEY_CUCIM: - args.nprocesses = 0 # --- provide models, pen and templates as fallbacks from package data ---- managed_pkg_data.inject_pkg_data_fallback(config) @@ -165,21 +201,27 @@ def main(argv=None): 'outdir': args.outdir, 'log_manager': lm, 'lock': mpm.Lock(), - 'shared_dict': mpm.dict(), + PARAM_SHARE: mpm.dict(), 'num_files': num_files, 'force': args.force, } + # init the dict of device assignment + _shared_state[PARAM_SHARE][KEY_ASSIGN] = mpm.dict() failed = mpm.list() setup_plotting_backend(lm.logger) try: - if args.nprocesses > 1: + # todo: for cuda --> must use spawn method if CUDA is enabled. + # todo: however a better memory management scheme should be tested. + # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime + # todo: at this moment we simply override the args.nprocesses + + if args.nprocesses > 0: with lm.logger_thread(): - print(args.nprocesses) with multiprocessing.Pool(processes=args.nprocesses, initializer=worker_setup, - initargs=(config,)) as pool: + initargs=(config, device_list, _shared_state)) as pool: try: for idx, file_name in enumerate(files): _ = pool.apply_async( @@ -197,6 +239,7 @@ def main(argv=None): else: for idx, file_name in enumerate(files): try: + device_assign(device_list, _shared_state[PARAM_SHARE]) _success = worker(idx, file_name, **_shared_state) except Exception as exc: worker_error(exc, failed) diff --git a/histoqc/_worker.py b/histoqc/_worker.py index ecf4e6a..163a998 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -1,18 +1,41 @@ """histoqc worker functions""" +import multiprocessing import os import shutil import traceback from histoqc.BaseImage import BaseImage from histoqc._pipeline import load_pipeline from histoqc._pipeline import setup_plotting_backend +from typing import Dict, List, Optional +from multiprocessing import managers + +KEY_ASSIGN: str = 'device_assign' +PARAM_SHARE: str = 'shared_dict' # --- worker functions -------------------------------------------------------- +def id_assign_helper(device_id_list: List[int], assign_dict: managers.DictProxy): + pid = os.getpid() + for device_id in device_id_list: + if device_id not in assign_dict.values(): + assign_dict[pid] = device_id + return + + +def device_assign(device_id_list: List[int], shared_dict: managers.DictProxy): + """Initializer to configure each worker with a specific GPU.""" + shared_dict[KEY_ASSIGN] = shared_dict.get(KEY_ASSIGN, None) + assert shared_dict[KEY_ASSIGN] is not None + assert KEY_ASSIGN in shared_dict + id_assign_helper(device_id_list, shared_dict[KEY_ASSIGN]) + -def worker_setup(c): +def worker_setup(c, device_id_list: List[int], state: Dict): """needed for multiprocessing worker setup""" setup_plotting_backend() + shared_dict = state[PARAM_SHARE] load_pipeline(config=c) + device_assign(device_id_list, shared_dict) def worker(idx, file_name, *, @@ -35,9 +58,15 @@ def worker(idx, file_name, *, os.makedirs(fname_outdir) log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") + device_id = shared_dict[KEY_ASSIGN].get(os.getpid(), None) + if device_id is None: + log_manager.logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id." + f"Default: use 0 for CUDA devices.") + s: Optional[BaseImage] = None try: - s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage"))) + s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")), + device_id=device_id) for process, process_params in process_queue: process_params["lock"] = lock process_params["shared_dict"] = shared_dict @@ -45,7 +74,10 @@ def worker(idx, file_name, *, s["completed"].append(process.__name__) except Exception as exc: # reproduce histoqc error string - _oneline_doc_str = exc.__doc__.replace('\n', '') + if s is not None: + s.image_handle.release() + print(f"DBG: {__name__}: {exc}") + _oneline_doc_str = exc.__doc__.replace('\n', '') if exc.__doc__ is not None else '' err_str = f"{exc.__class__} {_oneline_doc_str} {exc}" trace_string = traceback.format_exc() log_manager.logger.error( diff --git a/histoqc/array_adapter/__init__.py b/histoqc/array_adapter/__init__.py index c43f502..4ed63c5 100644 --- a/histoqc/array_adapter/__init__.py +++ b/histoqc/array_adapter/__init__.py @@ -1,2 +1,2 @@ from .func_mapping import FUNC_MAP -from .adapter import ArrayAdapter, ArrayDevice +from .adapter import ArrayAdapter, ArrayDeviceType, Device diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py index 17c46e3..52b3a15 100644 --- a/histoqc/array_adapter/adapter.py +++ b/histoqc/array_adapter/adapter.py @@ -3,7 +3,7 @@ from histoqc.array_adapter.func_mapping import FUNC_MAP import numpy as np from numbers import Number -from typing import Callable, Mapping, Tuple, Optional, Any, Iterable +from typing import Callable, Mapping, Tuple, Optional, Any, Iterable, Dict from typing_extensions import Self, TypeGuard from histoqc.import_wrapper.cupy_extra import cupy as cp from enum import Enum @@ -11,6 +11,7 @@ import functools from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv import skimage +import re def cupy_installed() -> bool: @@ -21,7 +22,7 @@ def cupy_installed() -> bool: return False -class ArrayDevice(Enum): +class ArrayDeviceType(Enum): CPU: str = 'cpu' CUDA: str = 'cuda' @@ -36,7 +37,7 @@ def from_str(cls, str_val: str): return cls(str_val) @classmethod - def build(cls, value: str | Self): + def build(cls, value: str | Self) -> Self: if isinstance(value, cls): return value if isinstance(value, str): @@ -44,14 +45,94 @@ def build(cls, value: str | Self): raise TypeError(f'Unexpected type {type(value)}') +class Device: + __device_type: ArrayDeviceType + __device_id: Optional[int] + __instances: Dict[Tuple[ArrayDeviceType, Optional[int]], Self] = dict() + _is_initialized: bool + DEFAULT_ID: int = 0 + + DEVICE_CPU: str = 'cpu' + DEVICE_CUDA: str = 'cuda' + + def is_cpu(self) -> bool: + return self.__device_type is ArrayDeviceType.CPU + + def is_cuda(self) -> bool: + return self.__device_type is ArrayDeviceType.CUDA + + @property + def device_type(self) -> ArrayDeviceType: + return self.__device_type + + @property + def device_id(self) -> Optional[int]: + return self.__device_id + + def __init__(self, device_type: ArrayDeviceType, device_id: Optional[int] = None) -> None: + if not hasattr(self, "_is_initialized") or not self._is_initialized: + self.__device_type = device_type + if self.is_cuda() and device_id is None: + device_id = Device.DEFAULT_ID + self.__device_id = device_id + self._is_initialized = True + + def __repr__(self) -> str: + dev_id = f":{self.device_id}" if self.device_type is ArrayDeviceType.CUDA else "" + return f"{self.device_type.value}{dev_id}" + + def __reduce__(self): + # Return a tuple representing the pickling state + return self.__class__, (self.__device_type, self.__device_id) + + def __new__(cls, device_type: ArrayDeviceType, device_id: Optional[int] = None): + device_id = device_id if device_type is ArrayDeviceType.CUDA else None + if device_type is ArrayDeviceType.CUDA and device_id is None: + device_id = cls.DEFAULT_ID + + key = (device_type, device_id) + if key not in cls.__instances: + inst = super().__new__(cls) + cls.__instances[key] = inst + return cls.__instances[key] + + @classmethod + def parse_input(cls, device: str | int) -> Tuple[ArrayDeviceType, Optional[int]]: + assert device is not None, f"device must not be None" + if isinstance(device, int): + return ArrayDeviceType.CUDA, device + + assert isinstance(device, str), f"device must either be int (GPU:device) or str (cpu|cuda)[:device]" + regex = r'^(cpu|cuda)(:(\d+))?$' + match = re.match(regex, device) + + assert match is not None and len(match.groups()) == 3, f"Unexpected input format: {device}" + groups = match.groups() + + if groups[0] == cls.DEVICE_CPU: + # Handle the "cpu" case + return ArrayDeviceType.CPU, None + elif groups[0] == cls.DEVICE_CUDA and groups[2] is None: + return ArrayDeviceType.CUDA, cls.DEFAULT_ID + elif groups[0] == cls.DEVICE_CUDA and groups[2] is not None: + device_id = int(groups[2]) + return ArrayDeviceType.CUDA, device_id + raise ValueError(f"Unexpected input format: {device}") + + @classmethod + def build(cls, device: str | int): + device_type, device_id = cls.parse_input(device) + return cls(device_type, device_id) + + class ArrayAdapter(Callable): func_map: Mapping[Callable, Callable] - input_device: Optional[str | ArrayDevice] - output_device: Optional[str | ArrayDevice] - contingent_type: ArrayDevice + input_device: Optional[str | Device] + output_device: Optional[str | Device] + contingent_device: Device - TYPE_CONTINGENT_DEFAULT: ArrayDevice = ArrayDevice.CUDA + id: int @staticmethod def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]: @@ -69,63 +150,81 @@ def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP: return arr.get() @staticmethod - def to_cupy(arr: TYPE_ARRAY | Number, copy: bool = False) -> TYPE_CP: + def to_cupy(arr: TYPE_ARRAY | Number, device: Device, copy: bool = False) -> TYPE_CP: assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \ f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}" - return cp.array(arr, copy=copy) + assert device is not None and isinstance(device, Device) and device.is_cuda(), f"{device} is not CUDA device" + with cp.cuda.Device(device.device_id): + return cp.array(arr, copy=copy) @staticmethod - def array_device_type(arr: TYPE_ARRAY) -> ArrayDevice: - on_cpu = isinstance(arr, np.ndarray) - return ArrayDevice.from_bool(on_cpu) + def array_device_type(arr: TYPE_ARRAY) -> Device: + on_cpu = ArrayAdapter.is_numpy(arr) + if on_cpu: + return Device.build(Device.DEVICE_CPU) + assert cupy_installed() and ArrayAdapter.is_cupy(arr) + return Device.build(f"{Device.DEVICE_CUDA}:{arr.device.id}") @staticmethod def is_array(arr: TYPE_ARRAY) -> bool: - return isinstance(arr, np.ndarray) or (cupy_installed() and isinstance(arr, cp.ndarray)) + return ArrayAdapter.is_numpy(arr) or ArrayAdapter.is_cupy(arr) @classmethod - def new_array(cls, arr: Any, array_device: ArrayDevice) -> TYPE_ARRAY: + def new_array(cls, arr: Any, array_device: Device) -> TYPE_ARRAY: if cls.is_array(arr): - return cls.move_to_device(arr, array_device, copy=False) + return cls._move_to_device(arr, array_device, copy=True) if isinstance(arr, Iterable): - arr = cls.curate_array_device(*arr, device=array_device, copy=False) - if array_device is array_device.CUDA: + arr = cls.curate_arrays_device(*arr, device=array_device, copy=True) + if array_device.is_cuda(): assert cupy_installed() - return cp.asarray(arr) - elif array_device is array_device.CPU: + with cp.cuda.Device(array_device.device_id): + return cp.asarray(arr) + elif array_device.DEVICE_CPU: return np.asarray(arr) - raise TypeError(f'Unexpected device {ArrayDevice}') + raise TypeError(f'Unexpected device {ArrayDeviceType}') def asarray(self, arr: Any) -> TYPE_ARRAY: return self.__class__.new_array(arr, self.output_device) @classmethod - def move_to_device(cls, - arr: Optional[TYPE_ARRAY], - device: Optional[ArrayDevice], copy: bool = False) -> TYPE_ARRAY: + def _move_to_device(cls, + arr: Optional[TYPE_ARRAY], + device: Optional[Device], copy: bool = False) -> TYPE_ARRAY: # structural match > py3.10 if device is None or not cls.is_array(arr): return arr - if device is ArrayDevice.CPU: + assert device is not None + if device.is_cpu(): return ArrayAdapter.to_numpy(arr, copy=copy) - elif device is ArrayDevice.CUDA: - return ArrayAdapter.to_cupy(arr, copy=copy) + elif device.is_cuda(): + return ArrayAdapter.to_cupy(arr, device, copy=copy) raise ValueError(f"Unsupported device: {device}") def sync(self, arr: Optional[TYPE_ARRAY | Number], copy: bool = False) -> TYPE_ARRAY: if not self.__class__.is_array(arr): return arr - return self.__class__.move_to_device(arr, device=self.output_device, copy=copy) + return self.__class__._move_to_device(arr, device=self.output_device, copy=copy) @classmethod - def curate_device_helper(cls, output: TYPE_ARRAY, device: Optional[ArrayDevice], copy: bool): - if output is not None and cls.is_array(output): - output = cls.move_to_device(output, device, copy=copy) - return output + def curate_device_helper(cls, arr: TYPE_ARRAY, device: Optional[Device], copy: bool): + if arr is not None and cls.is_array(arr): + arr = cls._move_to_device(arr, device, copy=copy) + return arr @classmethod - def curate_array_device(cls, *arrays: TYPE_ARRAY, - device: Optional[ArrayDevice], copy: bool) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + def curate_arrays_device(cls, *arrays: TYPE_ARRAY, + device: Optional[Device], + copy: bool = False) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + """Curate the device type of one or more arrays. + + Args: + *arrays: + device: + copy: + + Returns: + + """ # already an array - no need to recursively unpack if cls.is_array(arrays): return cls.curate_device_helper(arrays, device, copy) @@ -142,74 +241,99 @@ def curate_array_device(cls, *arrays: TYPE_ARRAY, @staticmethod def get_api(cpu_func: Callable, - func_map: Mapping[Callable, Callable], device_type: ArrayDevice) -> Tuple[Callable, ArrayDevice]: - if device_type == ArrayDevice.CPU: - return cpu_func, ArrayDevice.CPU + func_map: Mapping[Callable, Callable], + device: Optional[Device]) -> Tuple[Callable, Device]: + if device is None: + logging.warning(f"Device unspecified in both input data and input device. Try: gpu:0") + device = Device.build(Device.DEVICE_CUDA) + if device.is_cpu(): + return cpu_func, device + assert device.is_cuda() mapped = func_map.get(cpu_func, None) if mapped is not None: - return mapped, ArrayDevice.CUDA + return mapped, device # if not implemented func_name = getattr(cpu_func, '__qualname__', cpu_func.__name__) logging.info(f"{__name__}: {func_name} does not have a GPU implementation. Revert to CPU") - return cpu_func, ArrayDevice.CPU + return cpu_func, Device.build(Device.DEVICE_CPU) + + @classmethod + def call_helper(cls, func_in: Optional[TYPE_ARRAY], func_device: Device, func: Callable, *curated_args, + **curated_kwargs): + if cls.is_cupy(func_in) or (func_in is None and func_device.is_cuda()): + assert func_in is None or func_in.device.id == func_device.device_id, \ + f"{func_device} mismatch {func_in is None or func_in.device}" + with cp.cuda.Device(func_device.device_id): + return func(func_in, *curated_args, **curated_kwargs) + else: + return func(func_in, *curated_args, **curated_kwargs) @classmethod def unified_call(cls, cpu_func: Callable, func_map: Mapping[Callable, Callable], - input_device: Optional[str | ArrayDevice], - output_device: Optional[str | ArrayDevice], + input_device: Optional[Device], + output_device: Optional[Device], data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: # use input_device to override the current device, if not None - data = cls.curate_array_device(data, device=input_device, copy=False) - input_type = cls.array_device_type(data) - # attempt to fetch the op, revert to CPU if GPU impl is not available + data = cls.curate_arrays_device(data, device=input_device, copy=False) + # if data is None --> use input device. + # if input_device is None, by default will invoke GPU interface + input_type = cls.array_device_type(data) if data is not None else input_device + # attempt to fetch the op, revert to CPU if GPU impl is not available (func=GPU impl, func_device=cuda func, func_device = cls.get_api(cpu_func, func_map, input_type) - func_in = cls.curate_array_device(data, device=func_device, copy=False) + func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True) - curated_args = cls.curate_array_device(*args, device=func_device, copy=False) + curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True) curated_kwargs = dict() - for k, v in kwargs.items(): - curated_kwargs[k] = cls.curate_array_device(v, device=func_device, copy=False) + curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=True) - output = func(func_in, *curated_args, **curated_kwargs) + output = cls.call_helper(func_in, func_device, func, *curated_args, **curated_kwargs) # only move the output around if the output is an array if isinstance(output, tuple): - return cls.curate_array_device(*output, device=output_device, copy=False) - return cls.curate_array_device(output, device=output_device, copy=False) + return cls.curate_arrays_device(*output, device=output_device, copy=True) + return cls.curate_arrays_device(output, device=output_device, copy=True) @classmethod - def _validate_device(cls, device_type: Optional[ArrayDevice]) -> Optional[ArrayDevice]: - if device_type is None: + def _validate_device(cls, device: Optional[Device | str | int]) -> Optional[Device]: + if device is None: return None - if device_type is ArrayDevice.CPU: - return device_type - assert device_type is ArrayDevice.CUDA, f"Unsupported device_type: {device_type}" + if isinstance(device, (str, int)): + device = Device.build(device) + + assert isinstance(device, Device), f"{type(device)}" + + if device.is_cpu(): + return device + + assert device.is_cuda(), f"Unsupported device_type: {device}" if not cupy_installed(): logging.info(f"Cupy is not installed. Revert to CPU") - return ArrayDevice.CPU - return device_type + return Device.build(Device.DEVICE_CPU) + return device def __init__(self, - input_device: Optional[str | ArrayDevice], - output_device: Optional[str | ArrayDevice], + input_device: Optional[str | int | Device], + output_device: Optional[str | int | Device], func_map: Mapping[Callable, Callable], - contingent_type: ArrayDevice, + contingent_device: Device, ): self.input_device = self.__class__._validate_device(input_device) self.output_device = self.__class__._validate_device(output_device) self.func_map = func_map - self.contingent_type = contingent_type + self.contingent_device = contingent_device @classmethod def build(cls, - input_device: Optional[str | ArrayDevice], - output_device: Optional[str | ArrayDevice], + input_device: Optional[str | int | Device], + output_device: Optional[str | int | Device], func_map: Mapping[Callable, Callable] = FUNC_MAP, - contingent_type: ArrayDevice = TYPE_CONTINGENT_DEFAULT): + contingent_device: Device = None) -> Self: + if contingent_device is None: + contingent_device = output_device return cls(input_device=input_device, output_device=output_device, func_map=func_map, - contingent_type=contingent_type) + contingent_device=contingent_device) def apply(self, /, cpu_func: Callable, data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: return self.unified_call(cpu_func, self.func_map, self.input_device, self.output_device, @@ -227,90 +351,100 @@ def __sync_device_output_helper(*arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ @classmethod def device_sync_all_helper(cls, *arrays: TYPE_ARRAY, - array_device: Optional[ArrayDevice], - contingent_type: Optional[ArrayDevice]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + array_device: Optional[Device], + contingent_device: Optional[Device]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: assert isinstance(arrays, tuple), f"input check. {type(arrays)} is not a tuple" if array_device is not None: - return cls.__sync_device_output_helper(tuple(cls.move_to_device(arr, array_device) for arr in arrays)) + return cls.__sync_device_output_helper(tuple(cls._move_to_device(arr, array_device) for arr in arrays)) assert array_device is None - has_contingent_device = any(cls.array_device_type(arr) is contingent_type for arr in arrays) + has_contingent_device = any(cls.array_device_type(arr) is contingent_device for arr in arrays) if has_contingent_device: - assert contingent_type is not None + assert contingent_device is not None return cls.__sync_device_output_helper(cls.device_sync_all_helper(*arrays, - array_device=contingent_type, - contingent_type=None)) + array_device=contingent_device, + contingent_device=None)) return cls.__sync_device_output_helper(arrays) def device_sync_all(self, *arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + """Synchronize the device of all arrays to be + + Args: + *arrays: + + Returns: + + """ return self.__class__.device_sync_all_helper(*arrays, array_device=self.output_device, - contingent_type=self.contingent_type) + contingent_device=self.contingent_device) @classmethod def binary_operation(cls, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY, - input_device: Optional[ArrayDevice], - output_device: Optional[ArrayDevice], - contingent_type: Optional[ArrayDevice], + input_device: Optional[Device], + output_device: Optional[Device], + contingent_device: Optional[Device], op: Callable) -> TYPE_ARRAY: arr1, arr2 = cls.device_sync_all_helper(arr1, arr2, array_device=input_device, - contingent_type=contingent_type) + contingent_device=contingent_device) result: TYPE_ARRAY = op(arr1, arr2) - return cls.curate_array_device(result, device=output_device, copy=False) + return cls.curate_arrays_device(result, device=output_device, copy=True) def and_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=and_) def or_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=or_) def add(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=add) def sub(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=sub) def mul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=mul) def matmul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=matmul) def truediv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=truediv) def floordiv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=floordiv) def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, output_device=self.output_device, - contingent_type=self.contingent_type, + contingent_device=self.contingent_device, op=xor) @classmethod - def imsave(cls, filename: str, arr: TYPE_ARRAY): - arr = cls.curate_array_device(arr, device=ArrayDevice.CPU, copy=False) - return skimage.io.imsave(filename, arr) + def imsave(cls, filename: str, arr: TYPE_ARRAY, **kwargs): + logging.debug(f"{__name__}: SHAPE DBG {arr.shape}") + arr = cls.curate_arrays_device(arr, device=Device.build(Device.DEVICE_CPU), copy=True) + logging.debug(f"{__name__}: TYPE DBG {type(arr)}") + return skimage.io.imsave(filename, arr, **kwargs) diff --git a/histoqc/array_adapter/func_mapping.py b/histoqc/array_adapter/func_mapping.py index aa55b46..8ecc0ca 100644 --- a/histoqc/array_adapter/func_mapping.py +++ b/histoqc/array_adapter/func_mapping.py @@ -1,5 +1,6 @@ import scipy.ndimage import skimage +import sklearn from typing import Callable, Mapping from scipy import ndimage as sci_ndi import numpy as np @@ -9,7 +10,13 @@ from cupyx.scipy import signal as cu_signal from cucim import skimage as cu_skimage from cupyx.scipy import ndimage as cu_ndi + from sklearn import naive_bayes as sk_naive_bayes + from cuml import naive_bayes as cuml_naive_bayes + import cuml + from cuml import ensemble as cuml_ensemble + from sklearn import ensemble as sk_ensemble + # noinspection PyUnresolvedReferences FUNC_MAP: Mapping[Callable, Callable] = { skimage.color.convert_colorspace: cu_skimage.color.convert_colorspace, skimage.color.rgb2gray: cu_skimage.color.rgb2gray, @@ -45,7 +52,9 @@ skimage.filters.rank.minimum: None, # cu_skimage.morphology.erosion, sci_signal.convolve2d: cu_signal.convolve2d, sci_ndi.generate_binary_structure: cu_ndi.generate_binary_structure, - np.digitize: cp.digitize + np.digitize: cp.digitize, + # sk_naive_bayes.GaussianNB: cuml_naive_bayes.GaussianNB, + # sk_ensemble.RandomForestClassifier: cuml_ensemble.RandomForestClassifier, } except ImportError: FUNC_MAP = dict() diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py index c67d3e1..33d6586 100644 --- a/histoqc/import_wrapper/cupy_extra.py +++ b/histoqc/import_wrapper/cupy_extra.py @@ -1,5 +1,7 @@ try: import cupy + # cupy.cuda.set_allocator(None) + cupy.cuda.set_allocator(cupy.cuda.MemoryPool().malloc) except ImportError: cupy = None finally: diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py index 9a65a71..56bf82e 100644 --- a/histoqc/wsi_handles/base.py +++ b/histoqc/wsi_handles/base.py @@ -3,11 +3,11 @@ from histoqc.import_wrapper import dynamic_import import logging -from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic +from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic, Optional, cast import numpy as np from PIL.Image import Image as PILImage from typing_extensions import final -from histoqc.array_adapter import ArrayDevice, ArrayAdapter +from histoqc.array_adapter import ArrayDeviceType, ArrayAdapter, Device import os from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER @@ -22,6 +22,11 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]): handle: T fname: str _adapter: ArrayAdapter + _device: Device + + @property + def device(self) -> Device: + return self._device @staticmethod def curate_shorter_edge(width, height, limit, aspect_ratio): @@ -188,7 +193,7 @@ def array_shape(arr: ARRAY) -> Tuple[int, int]: @staticmethod @abstractmethod - def backend_to_array(region: Union[Backend, ARRAY]) -> ARRAY: + def backend_to_array(region: Union[Backend, ARRAY], device: Optional[Device]) -> ARRAY: ... def read_region(self, location, level, size, **kwargs) -> PILImage: @@ -205,7 +210,7 @@ def read_macro(self) -> Backend: @classmethod @abstractmethod - def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int]) -> ARRAY: + def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int], device: Optional[Device]) -> ARRAY: ... @abstractmethod @@ -216,12 +221,12 @@ def curated_best_level_for_downsample(self, downsample_factor: float) -> Tuple[i relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in self.level_downsamples] level = np.where(relative_down_factors_idx)[0] if level.size: - return level[0], True + return cast(int, level[0]), True return self.get_best_level_for_downsample(downsample_factor), False @staticmethod @abstractmethod - def grid_stack(grid: List[List[ARRAY]]): + def grid_stack(grid: List[List[ARRAY]], device: Optional[device]) -> ARRAY: ... def resize_tile_downward(self, target_downsampling_factor, level, @@ -260,15 +265,16 @@ def resize_tile_downward(self, target_downsampling_factor, level, **read_region_kwargs) if np.shape(closest_region)[-1] == 4: closest_region = self.backend_rgba2rgb(closest_region) - closest_region_arr = self.__class__.backend_to_array(closest_region) + closest_region_arr = self.__class__.backend_to_array(closest_region, self.device) target_region = self.__class__.region_resize_arr(closest_region_arr, - (win_down_width, win_down_height)) + (win_down_width, win_down_height), + device=self.device) row_piece.append(target_region) # row_piece = np.concatenate(row_piece, axis=0) grid.append(row_piece) # grid = np.concatenate(output, axis=1) # - return self.__class__.grid_stack(grid) + return self.__class__.grid_stack(grid, device=self.device) def best_thumb(self, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float, **read_region_kwargs) -> ARRAY: @@ -276,15 +282,15 @@ def best_thumb(self, x: int, y: int, dims: Tuple[int, int], # get thumb from og if not self.has_bounding_box: max_dim = dims[0] if dims[0] > dims[1] else dims[1] - return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim))) + return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim)), device=self.device) (level, is_exact_level) = self.curated_best_level_for_downsample(target_sampling_factor) # check if to get the existing level if is_exact_level: backend: Backend = self.read_region((x, y), level, dims) - return self.__class__.backend_to_array(self.backend_rgba2rgb(backend)) \ - if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend) + return self.__class__.backend_to_array(self.backend_rgba2rgb(backend), device=self.device) \ + if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend, device=self.device) # scale down the thumb img from the next high level else: return self.resize_tile_downward(target_sampling_factor, level, win_size=2048, **read_region_kwargs) @@ -311,13 +317,14 @@ def parse_wsi_handles(handle_list: str | List[str], delimiter: str, @classmethod def __create_handle(cls, fname: str, - handle_class_list: List[Callable[[str], "WSIImageHandle"]]) -> "WSIImageHandle": + handle_class_list: List[Callable[[str, Optional[int]], "WSIImageHandle"]], + device_id: Optional[int]) -> "WSIImageHandle": image_handle = None assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file" for handle_class in handle_class_list: # noinspection PyBroadException try: - image_handle = handle_class(fname) + image_handle = handle_class(fname, device_id) break except Exception: # current wsi handle class doesn't support this file @@ -333,16 +340,18 @@ def __create_handle(cls, fname: str, @classmethod @final - def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle": + def build_handle(cls, fname: str, handles: str, device_id: Optional[int]) -> "WSIImageHandle": # get handles list module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES) handle_class_list = dynamic_import(module_list, attr_list, return_first=False) - image_handle = cls.__create_handle(fname, handle_class_list) + image_handle = cls.__create_handle(fname, handle_class_list, device_id) return image_handle - def __init__(self, fname: str): + def __init__(self, fname: str, device_id: Optional[int]): self.fname = fname - self._adapter = ArrayAdapter.build(input_device=self.device, output_device=self.device) + self._device = Device(self.device_type, device_id) + self._adapter = ArrayAdapter.build(input_device=self._device, output_device=self._device, + contingent_device=self._device) @abstractmethod def close_handle(self): @@ -357,9 +366,13 @@ def is_closed(self): @property @abstractmethod - def device(self) -> ArrayDevice: + def device_type(self) -> ArrayDeviceType: raise NotImplementedError @property def adapter(self) -> ArrayAdapter: return self._adapter + + @abstractmethod + def release(self): + ... diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py index df23ee3..d9c6a70 100644 --- a/histoqc/wsi_handles/cuimage_handle.py +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -1,43 +1,61 @@ from __future__ import annotations + +import skimage.util from PIL.Image import Image as PILImage from cucim.clara import CuImage from .base import WSIImageHandle +import traceback from PIL import Image from ..import_wrapper.openslide import openslide import cupy as cp -from typing import List, Union, Tuple, Mapping +from typing import List, Union, Tuple, Mapping, Optional from typing import cast from lazy_property import LazyProperty import numpy as np from cucim import skimage as c_skimage -from histoqc.array_adapter import ArrayDevice -import gc +from histoqc.array_adapter import ArrayDeviceType, Device +from types import MappingProxyType +import logging + + +DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA) class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): - handle: CuImage + handle: Optional[CuImage] fname: str + _associated_images: Optional[Mapping] # TODO: standalone parser of vendor information - dummy_handle: openslide.OpenSlide + dummy_handle: Optional[openslide.OpenSlide] def backend_rgba2rgb(self, img: CuImage) -> CuImage: # todo: it appears that CuImage does not take care of the alpha channel at all. return img @classmethod - def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int]) -> cp.ndarray: - w, h, *_ = new_size_wh - arr = cp.array(data) - return c_skimage.transform.resize(arr, output_shape=(h, w)) + def validate_device(cls, device: Optional[Device]): + device = DEFAULT_DEVICE if device is None else device + assert isinstance(device, Device) + return device - def __init__(self, fname: str): - super().__init__(fname) + @classmethod + def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device: Optional[Device]) -> cp.ndarray: + device = cls.validate_device(device) + with cp.cuda.Device(device.device_id): + w, h, *_ = new_size_wh + arr = cp.array(data) + return c_skimage.transform.resize(arr, output_shape=(h, w)) + + def __init__(self, fname: str, device_id: Optional[int]): + super().__init__(fname, device_id) + self._associated_images = None self.handle = CuImage(fname) # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a # todo - native unified metadata interface for different vendors. self.dummy_handle = openslide.OpenSlide(fname) + logging.debug(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}") @LazyProperty def background_color(self): @@ -107,18 +125,23 @@ def get_thumbnail(self, new_dim): """ # from openslide - downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim))) - level = self.get_best_level_for_downsample(downsample) - thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level])) - # resize - thumb_cp = cp.array(thumb, copy=False) - target_w, target_h = (x // int(downsample) for x in self.dimensions) - aspect_ratio = self.dimensions[0] / self.dimensions[1] - - target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) - resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w)) - - return c_skimage.util.img_as_ubyte(resized) + with cp.cuda.Device(self.device.device_id): + downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim))) + level = self.get_best_level_for_downsample(downsample) + target_w, target_h = (x // int(downsample) for x in self.dimensions) + aspect_ratio = self.dimensions[0] / self.dimensions[1] + # resize + thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level])) + try: + thumb_cp = cp.array(thumb, copy=True) + target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) + resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), clip=True) + return c_skimage.util.img_as_ubyte(resized) + except Exception: + logging.warning(f"{__name__} - {self.fname}: OOM on {self.device.device_id}. {traceback.format_exc()}") + thumb_np = np.array(thumb, copy=True) + thumb_np = skimage.util.img_as_ubyte(thumb_np) + return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB") def get_best_level_for_downsample(self, down_factor: float) -> int: """Return the largest level that's smaller than the target downsample factor, consistent with openslide. @@ -134,14 +157,19 @@ def get_best_level_for_downsample(self, down_factor: float) -> int: down_indices = np.where(level_downsamples_arr <= down_factor)[0] down_values = level_downsamples_arr[down_indices] # find the indices of the down_indices that points to the best downsample factor value - return down_indices[down_values.argmax()] + return cast(int, down_indices[down_values.argmax()]) - def region_backend(self, location, level, size, **kwargs): - return self.handle.read_region(location=location, level=level, size=size, **kwargs) + def region_backend(self, location, level, size, **kwargs) -> CuImage: + with cp.cuda.Device(self.device.device_id): + return self.handle.read_region(location=location, level=level, size=size, **kwargs) @staticmethod - def backend_to_array(region: Union[CuImage, cp.ndarray]) -> cp.ndarray: - return cp.array(region, copy=False) + def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray: + device = CuImageHandle.validate_device(device) + with cp.cuda.Device(device.device_id): + result = cp.array(region, copy=False) + logging.debug(f"{__name__} - {device.device_id} == {result.device}") + return result @staticmethod def array_to_numpy(arr: cp.ndarray) -> np.ndarray: @@ -149,7 +177,7 @@ def array_to_numpy(arr: cp.ndarray) -> np.ndarray: @classmethod def backend_to_pil(cls, region: CuImage) -> PILImage: - return Image.fromarray(cls.backend_to_array(region).get()) + return Image.fromarray(cls.backend_to_array(region, None).get()) def read_label(self) -> CuImage: return self.handle.associated_image("label") @@ -157,14 +185,28 @@ def read_label(self) -> CuImage: def read_macro(self) -> CuImage: return self.handle.associated_image("macro") - @LazyProperty + @classmethod + def new_associated_images(cls, handle: CuImage) -> Mapping: + if handle is None or not hasattr(handle, "associated_images"): + return MappingProxyType(dict()) + keys = handle.associated_images + return MappingProxyType({k: handle.associated_image(k) for k in keys}) + + @property def associated_images(self) -> Mapping: - keys = self.handle.associated_images - return {k: self.handle.associated_image(k) for k in keys} + handle = getattr(self, "handle", None) + if not hasattr(self, "_associated_images") or self._associated_images is None: + self._associated_images = self.__class__.new_associated_images(handle) + return self._associated_images + + def clear_associated_images(self): + self._associated_images = None @staticmethod - def grid_stack(grid: List[List[cp.ndarray]]): - return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1) + def grid_stack(grid: List[List[cp.ndarray]], device: Optional[Device]): + device = CuImageHandle.validate_device(device) + with cp.cuda.Device(device.device_id): + return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1) @staticmethod def backend_dim(region: CuImage) -> Tuple[int, int]: @@ -175,6 +217,8 @@ def array_shape(arr: cp.ndarray) -> Tuple[int, ...]: return arr.shape def release(self): + # todo - what's the better practice? This forces to free everything and can lock up the kernel for a couple + # todo - of seconds cp.get_default_memory_pool().free_all_blocks() cp.get_default_pinned_memory_pool().free_all_blocks() @@ -183,12 +227,13 @@ def close_handle(self): self.handle.close() del self.handle self.handle = None - gc.collect() if self.dummy_handle is not None: self.dummy_handle.close() self.dummy_handle = None + # clear the cached map + self.clear_associated_images() self.release() @property - def device(self) -> ArrayDevice: - return ArrayDevice.CUDA + def device_type(self) -> ArrayDeviceType: + return ArrayDeviceType.CUDA diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py index c426cf2..1c28c33 100644 --- a/histoqc/wsi_handles/openslide_handle.py +++ b/histoqc/wsi_handles/openslide_handle.py @@ -3,12 +3,12 @@ from .base import WSIImageHandle from histoqc.import_wrapper.openslide import openslide -from typing import Union, Tuple, Sequence, List, Mapping -from typing import cast +from typing import Union, Tuple, Sequence, List, Mapping, cast, Optional from PIL.Image import Image as PILImage from .utils import rgba2rgb_pil from PIL import Image -from histoqc.array_adapter import ArrayDevice +from histoqc.array_adapter import ArrayDeviceType, Device +import logging class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]): @@ -16,17 +16,25 @@ class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]) _magnification_factor: str _has_bounding_box: bool fname: str - handle: openslide.OpenSlide + handle: Optional[openslide.OpenSlide] + + @classmethod + def sanitize_device(cls, device: Optional[Device]): + if device is None: + return + if device.device_type is not ArrayDeviceType.CPU: + logging.warning(f"Expect CPU but got {device.device_type}. No effect." + f"Check upstream device settings") def backend_rgba2rgb(self, img) -> PILImage: return rgba2rgb_pil(img, self.background_color) - def __init__(self, fname): - super().__init__(fname) + def __init__(self, fname: str, device_id: Optional[int] = None): + super().__init__(fname, device_id) self.handle = openslide.OpenSlide(fname) self._has_bounding_box = True self._bounding_box = self.__get_bounding_box() - + # get magnification factor from wsi slide self._magnification_factor = self.handle.properties.get("openslide.objective-power") or \ self.handle.properties.get("aperio.AppMag") @@ -97,7 +105,8 @@ def comment(self) -> str: return self.handle.properties.get("openslide.comment", "NA") @classmethod - def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int]): + def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int], device: Optional[Device]): + cls.sanitize_device(device) return np.array(Image.fromarray(data).resize(new_size_wh), copy=False) def get_thumbnail(self, new_dim): @@ -116,7 +125,8 @@ def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage: return region @staticmethod - def backend_to_array(region: PILImage) -> np.ndarray: + def backend_to_array(region: PILImage, device: Optional[Device]) -> np.ndarray: + OpenSlideHandle.sanitize_device(device) return np.array(region) @staticmethod @@ -134,7 +144,8 @@ def associated_images(self) -> Mapping[str, PILImage]: return self.handle.associated_images @staticmethod - def grid_stack(grid: List[List[np.ndarray]]): + def grid_stack(grid: List[List[np.ndarray]], device: Optional[Device]) -> np.ndarray: + OpenSlideHandle.sanitize_device(device) return np.concatenate([np.concatenate(row, axis=0) for row in grid], axis=1) @staticmethod @@ -151,5 +162,8 @@ def close_handle(self): self.handle = None @property - def device(self) -> ArrayDevice: - return ArrayDevice.CPU + def device_type(self) -> ArrayDeviceType: + return ArrayDeviceType.CPU + + def release(self): + ... From f02d76bd42cfd3fe4a71634b57070c2851388213 Mon Sep 17 00:00:00 2001 From: CielAl Date: Thu, 9 May 2024 08:42:57 -0400 Subject: [PATCH 5/6] Introduce Dask and Dask-GPU for CPU/GPU distributed/multiprocessing tasks. --- histoqc/BlurDetectionModule.py | 11 +- histoqc/ClassificationModule.py | 26 ++- histoqc/LightDarkModule.py | 6 +- histoqc/__main__.py | 163 +++++++++-------- histoqc/_log_conf.py | 205 +++++++++++++++++++++ histoqc/_logging.py | 226 ++++++++++++++++++++++++ histoqc/_pipeline.py | 84 +-------- histoqc/_worker.py | 76 ++++---- histoqc/array_adapter/adapter.py | 31 ++-- histoqc/import_wrapper/cupy_extra.py | 11 +- histoqc/import_wrapper/dask_cuda.py | 12 ++ histoqc/wsi_handles/base.py | 2 +- histoqc/wsi_handles/cuimage_handle.py | 95 ++++++---- histoqc/wsi_handles/openslide_handle.py | 3 +- requirements.txt | 1 + setup.py | 2 +- 16 files changed, 706 insertions(+), 248 deletions(-) create mode 100644 histoqc/_log_conf.py create mode 100644 histoqc/_logging.py create mode 100644 histoqc/import_wrapper/dask_cuda.py diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py index 176a132..4e173c6 100644 --- a/histoqc/BlurDetectionModule.py +++ b/histoqc/BlurDetectionModule.py @@ -18,12 +18,19 @@ def identifyBlurryRegions(s: BaseImage, params): adapter = s.image_handle.adapter blur_radius = int(params.get("blur_radius", 7)) blur_threshold = float(params.get("blur_threshold", .1)) - img = s.getImgThumb(params.get("image_work_size", "2.5x")) - img = adapter(rgb2gray)(img) + img_thumb = s.getImgThumb(params.get("image_work_size", "2.5x")) + + img = adapter(rgb2gray)(img_thumb) # use the __abs__ interface + logging.debug(f"{s['filename']} - \tidentifyBlurryRegions Gray:" + f" {img.max(), img.min(), blur_radius, blur_threshold}") + img_laplace = abs(adapter(skimage.filters.laplace)(img)) + + logging.debug(f"{s['filename']} - \tidentifyBlurryRegions img_laplace: {img_laplace.max(), img_laplace.min()}") mask = adapter(skimage.filters.gaussian)(img_laplace, sigma=blur_radius) <= blur_threshold + # for some reason resize takes a grayscale and produces a 3chan # Note: the reason you obtain a 3chan is that you specified a 3chan output shape mask_resized_shape = s.getImgThumb(s["image_work_size"]).shape[:2] diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 6c7f81f..5096bc9 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -4,7 +4,7 @@ import sys from histoqc.array_adapter import ArrayAdapter, Device from ast import literal_eval as make_tuple - +import traceback from distutils.util import strtobool from histoqc.BaseImage import printMaskHelper, BaseImage @@ -81,9 +81,15 @@ def compute_rgb(img, params): def compute_laplace(img, params): laplace_ksize = int(params.get("laplace_ksize", 3)) adapter = params["adapter"] + logging.debug(f"{__name__} - NaN check laplace img: {np.isnan(img).any()}") + # adapter.imsave(f"~/dbg/{params['filename']}_img_laplace.png", img) img_gray = adapter(rgb2gray)(img) + # adapter.imsave(f"~/dbg/{params['filename']}_gray_laplace.png", adapter(img_as_ubyte)(img_gray)) # return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] - return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None] + logging.debug(f"{__name__} - NaN check laplace gray: {np.isnan(img_gray).any()}, {type(img)}") + feat = adapter(laplace)(img_gray, ksize=laplace_ksize) + logging.debug(f"{__name__} - NaN check laplace feat: {np.isnan(feat).any()}") + return feat[:, :, None] def compute_lbp(img, params): @@ -157,11 +163,16 @@ def compute_frangi(img, params): sigmas = frangi_scale_range + (frangi_scale_step,) adapter: ArrayAdapter = params["adapter"] + # adapter.imsave(f"~/dbg/{params['filename']}_img_frangi.png", img) + logging.debug(f"{__name__} - NaN check frangi img: {np.isnan(img).any()}") img_gray = adapter(rgb2gray)(img) + # adapter.imsave(f"~/dbg/{params['filename']}_gray_frangi.png", adapter(img_as_ubyte)(img_gray)) + logging.debug(f"{__name__} - NaN check frangi gray: {np.isnan(img_gray).any()}") feat = adapter(frangi)(img_gray, sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, black_ridges=frangi_black_ridges) # feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, # black_ridges=frangi_black_ridges) + logging.debug(f"{__name__} - NaN check frangi feat: {np.isnan(feat).any()}") return feat[:, :, None] # add singleton dimension @@ -195,6 +206,7 @@ def byExampleWithFeatures(s: BaseImage, params): adapter = s.image_handle.adapter params['adapter'] = adapter + params['filename'] = s['filename'] with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model # then it is shared with all other modules if not params["shared_dict"].get("model_" + name, False): @@ -239,6 +251,9 @@ def byExampleWithFeatures(s: BaseImage, params): # do stuff here with model_vals model_vals = np.vstack(model_vals) clf = RandomForestClassifier(n_jobs=-1) + # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}") + model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels, + device=Device.build(Device.DEVICE_CPU), copy=True) adapter(clf.fit)(model_vals, y=model_labels.ravel()) # clf.fit(model_vals, y=model_labels.ravel()) params["shared_dict"]["model_" + name] = clf @@ -247,7 +262,12 @@ def byExampleWithFeatures(s: BaseImage, params): clf = params["shared_dict"]["model_" + name] img = s.getImgThumb(s["image_work_size"]) feats = compute_features(img, params) - cal = adapter(clf.predict_proba)(feats.reshape(-1, feats.shape[2])) + logging.debug(f"{__name__} - {s['filename']} - NaN check img: {np.isnan(img).any()}") + logging.debug(f"{__name__} - {s['filename']} - NaN check feats: {np.isnan(feats).any()}") + + feats = feats.reshape(-1, feats.shape[2]) + feats = adapter.curate_arrays_device(feats, device=Device.build(Device.DEVICE_CPU), copy=True) + cal = adapter(clf.predict_proba)(feats) cal = cal.reshape(img.shape[0], img.shape[1], 2) mask = cal[:, :, 1] > thresh diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py index 2312089..fc9f5c4 100644 --- a/histoqc/LightDarkModule.py +++ b/histoqc/LightDarkModule.py @@ -68,11 +68,11 @@ def getIntensityThresholdPercent(s: BaseImage, params): img = s.getImgThumb(s["image_work_size"]) img_std = img.std(axis=2) - map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std) + map_std = adapter.and_(img_std > lower_std, img_std < upper_std) img = adapter(color.rgb2gray)(img) - region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh) - region_between_interval = np.bitwise_and(region_between_interval, map_std) + region_between_interval = adapter.and_(img > lower_thresh, img < upper_thresh) + region_between_interval = adapter.and_(region_between_interval, map_std) s["img_mask_" + name] = region_between_interval > 0 diff --git a/histoqc/__main__.py b/histoqc/__main__.py index 8462cc1..b8ec752 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -7,26 +7,29 @@ import os import sys import time -from functools import partial +# from functools import partial from typing import Tuple, Optional, List +import dask.distributed + from histoqc._pipeline import BatchedResultFile -from histoqc._pipeline import MultiProcessingLogManager +# from histoqc._pipeline import MultiProcessingLogManager +from copy import deepcopy +from histoqc._logging import LoggingSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, HDL_OUT_FIELD from histoqc._pipeline import load_pipeline from histoqc._pipeline import log_pipeline from histoqc._pipeline import move_logging_file_handler -from histoqc._pipeline import setup_logging +# from histoqc._pipeline import setup_logging from histoqc._pipeline import setup_plotting_backend -from histoqc._worker import worker -from histoqc._worker import worker_setup -from histoqc._worker import worker_success -from histoqc._worker import worker_error -from histoqc._worker import PARAM_SHARE, device_assign, KEY_ASSIGN +from histoqc._worker import (worker, worker_setup, worker_success, worker_error, worker_single_process, + PARAM_SHARE, KEY_ASSIGN) + from histoqc.config import read_config_template from histoqc.data import managed_pkg_data from histoqc.wsi_handles.constants import KEY_CUCIM -from histoqc.array_adapter.adapter import cupy_installed -from histoqc.import_wrapper.cupy_extra import cp +from histoqc.import_wrapper.cupy_extra import cp, cupy_installed +from histoqc.import_wrapper.dask_cuda import dask_cuda_installed, dask_cuda +from dask.distributed import Client, as_completed def parse_config(args: argparse.Namespace) -> Tuple[configparser.ConfigParser, Optional[str]]: @@ -48,15 +51,15 @@ def _get_device_list(n_proc: int): def parse_multiprocessing(args: argparse.Namespace, - config: configparser.ConfigParser) -> Tuple[argparse.Namespace, List[int]]: + config: configparser.ConfigParser) -> Tuple[argparse.Namespace, bool]: # List[int], is_multiproc = args.nprocesses >= 0 is_cuda = KEY_CUCIM in config["BaseImage.BaseImage"].get("handles", "") # if use cuda but without installation of dependencies - return - if not is_cuda or not cupy_installed(): - return args, _get_device_list(args.nprocesses) + if not is_cuda or not (cupy_installed() and dask_cuda_installed()): + return args, False # _get_device_list(args.nprocesses), # guard - assert is_cuda and cp is not None, f"Enable CUDA but cupy is not installed" + assert is_cuda and cp is not None and dask_cuda is not None, f"Enable CUDA but Dep is not installed" # set spawn if is_multiproc: multiprocessing.set_start_method("spawn", force=True) @@ -68,8 +71,16 @@ def parse_multiprocessing(args: argparse.Namespace, f"{args.nprocesses} > {num_devices}. Cutoff the number of processes to {num_devices}") args.nprocesses = min(args.nprocesses, num_devices) # device list --> if - device_list = _get_device_list(args.nprocesses) - return args, device_list + # device_list = _get_device_list(args.nprocesses) + return args, is_cuda + + +def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]]): + assert nprocesses > 0, f"Expect number of processes > 0 to launch the cluster. Get {nprocesses}" + if not is_cuda: + return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, ) + assert cp is not None and dask_cuda is not None + return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, n_workers=nprocesses) @managed_pkg_data @@ -111,21 +122,35 @@ def main(argv=None): parser.add_argument('--symlink', metavar="TARGET_DIR", help="create symlink to outdir in TARGET_DIR", default=None) + parser.add_argument('--gpu_ids', + type=int, + nargs='+', + help="GPU Devices to use (None=use all). Default: None", + default=None) args = parser.parse_args(argv) # --- multiprocessing and logging setup ----------------------------------- - # todo: move config parsing above the mpm initialization and set the start method accordingly - # multiprocessing.set_start_method("spawn") - - setup_logging(capture_warnings=True, filter_warnings='ignore') # --- parse the pipeline configuration ------------------------------------ config, conf_warn_msg = parse_config(args) - args, device_list = parse_multiprocessing(args, config) - mpm = multiprocessing.Manager() - lm = MultiProcessingLogManager('histoqc', manager=mpm) + args, is_cuda = parse_multiprocessing(args, config) + + # --- create output directory and move log -------------------------------- + args.outdir = os.path.expanduser(args.outdir) + os.makedirs(args.outdir, exist_ok=True) + # move_logging_file_handler(logging.getLogger(), args.outdir) + + logging_setup = LoggingSetup(deepcopy(MAIN_CONF_BUILD), + deepcopy(WORKER_CONF_BUILD), + capture_warnings=True, filter_warnings='ignore') + # setup main proc logger config and file handler. + logging_setup.setup_main_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN, + handler_name=HDL_FILE, out_field=HDL_OUT_FIELD) + + # Inherit from the root logger. + main_logger = logging.getLogger(__name__) if conf_warn_msg: - lm.logger.warning(conf_warn_msg) + main_logger.warning(conf_warn_msg) # --- provide models, pen and templates as fallbacks from package data ---- @@ -133,27 +158,23 @@ def main(argv=None): # --- load the process queue (error early) -------------------------------- - _steps = log_pipeline(config, log_manager=lm) + _steps = log_pipeline(config, logger=main_logger) process_queue = load_pipeline(config) # --- check symlink target ------------------------------------------------ - if args.symlink is not None: if not os.path.isdir(args.symlink): - lm.logger.error("error: --symlink {args.symlink} is not a directory") + main_logger.error("error: --symlink {args.symlink} is not a directory") return -1 - # --- create output directory and move log -------------------------------- - args.outdir = os.path.expanduser(args.outdir) - os.makedirs(args.outdir, exist_ok=True) - move_logging_file_handler(logging.getLogger(), args.outdir) - if BatchedResultFile.results_in_path(args.outdir): if args.force: - lm.logger.info("Previous run detected....overwriting (--force set)") + main_logger.info("Previous run detected....overwriting (--force set)") else: - lm.logger.info("Previous run detected....skipping completed (--force not set)") - + main_logger.info("Previous run detected....skipping completed (--force not set)") + # for writing the results after workers succeed. + mpm = multiprocessing.Manager() + # results only utilize the lock and sync list from mpm. mpm is not saved. results = BatchedResultFile(args.outdir, manager=mpm, batch_size=args.batch, @@ -189,9 +210,9 @@ def main(argv=None): pth = os.path.join(args.basepath, args.input_pattern[0]) files = glob.glob(pth, recursive=True) - lm.logger.info("-" * 80) + main_logger.info("-" * 80) num_files = len(files) - lm.logger.info(f"Number of files detected by pattern:\t{num_files}") + main_logger.info(f"Number of files detected by pattern:\t{num_files}") # --- start worker processes ---------------------------------------------- @@ -199,66 +220,52 @@ def main(argv=None): 'process_queue': process_queue, 'config': config, 'outdir': args.outdir, - 'log_manager': lm, - 'lock': mpm.Lock(), - PARAM_SHARE: mpm.dict(), + 'lock': mpm.Lock(), # todo transit to Dask's Lock + PARAM_SHARE: mpm.dict(), # todo transit to Dask's Variable 'num_files': num_files, 'force': args.force, } # init the dict of device assignment _shared_state[PARAM_SHARE][KEY_ASSIGN] = mpm.dict() failed = mpm.list() - setup_plotting_backend(lm.logger) + setup_plotting_backend(main_logger) try: - # todo: for cuda --> must use spawn method if CUDA is enabled. - # todo: however a better memory management scheme should be tested. - # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime - # todo: at this moment we simply override the args.nprocesses if args.nprocesses > 0: + local_cluster = new_cluster('histoqc', is_cuda=is_cuda, + nprocesses=args.nprocesses, gpu_ids=args.gpu_ids) + with Client(local_cluster) as client: + # register the worker side + logging_setup.setup_client(client, forward_name="root") - with lm.logger_thread(): - with multiprocessing.Pool(processes=args.nprocesses, - initializer=worker_setup, - initargs=(config, device_list, _shared_state)) as pool: - try: - for idx, file_name in enumerate(files): - _ = pool.apply_async( - func=worker, - args=(idx, file_name), - kwds=_shared_state, - callback=partial(worker_success, result_file=results), - error_callback=partial(worker_error, failed=failed), - ) - - finally: - pool.close() - pool.join() + # noinspection PyTypeChecker + futures_list = [client.submit(worker, idx, file_name, **_shared_state) + for idx, file_name in enumerate(files)] + for future in as_completed(futures_list): + try: + base_img_finished = future.result() + except Exception as exc: + worker_error(exc, failed) + else: + worker_success(base_img_finished, result_file=results) else: - for idx, file_name in enumerate(files): - try: - device_assign(device_list, _shared_state[PARAM_SHARE]) - _success = worker(idx, file_name, **_shared_state) - except Exception as exc: - worker_error(exc, failed) - continue - else: - worker_success(_success, results) + worker_setup() + worker_single_process(files, failed, results, **_shared_state) except KeyboardInterrupt: - lm.logger.info("-----REQUESTED-ABORT-----\n") + main_logger.info("-----REQUESTED-ABORT-----\n") else: - lm.logger.info("----------Done-----------\n") + main_logger.info("----------Done-----------\n") finally: - lm.logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log)," - " warnings are listed in warnings column in output") + main_logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log)," + " warnings are listed in warnings column in output") for file_name, error, tb in failed: - lm.logger.info(f"{file_name}\t{error}\n{tb}") + main_logger.info(f"{file_name}\t{error}\n{tb}") if args.symlink is not None: origin = os.path.realpath(args.outdir) @@ -268,9 +275,9 @@ def main(argv=None): ) try: os.symlink(origin, target, target_is_directory=True) - lm.logger.info("Symlink to output directory created") + main_logger.info("Symlink to output directory created") except (FileExistsError, FileNotFoundError): - lm.logger.error( + main_logger.error( f"Error creating symlink to output in '{args.symlink}', " f"Please create manually: ln -s {origin} {target}" ) diff --git a/histoqc/_log_conf.py b/histoqc/_log_conf.py new file mode 100644 index 0000000..ae384b8 --- /dev/null +++ b/histoqc/_log_conf.py @@ -0,0 +1,205 @@ +from typing_extensions import TypedDict, NotRequired, Literal, Self +from typing import Dict, Type, Callable, Any, List, Optional, Sequence, get_args, cast +import logging.config + +FormattersDict = TypedDict('FormattersDict', { + 'format': str, + 'datefmt': str, + 'style': str, + 'validate': str, + 'defaults': str, + 'class': NotRequired[str], + +}, total=False) + +FilterDict = TypedDict("FilterDict", { + '()': Type[logging.Filter] | Callable, + 'args': NotRequired[Any], +}) + +HandlerDict = TypedDict("HandlerDict", { + "class": str, + "level": NotRequired[str], + "formatter": NotRequired[str], + "filters": NotRequired[List[str | logging.Filter]], + "filename": NotRequired[str], + "stream": NotRequired[str], + "mode": NotRequired[str], +}) + + +class LoggerDict(TypedDict, total=False): + level: str + propagate: bool + filters: List[str | logging.Filter] + handlers: List[str] + + +class ConfigDict(TypedDict): + + version: int + formatters: NotRequired[Dict[str, FormattersDict]] + filters: NotRequired[Dict[str, FilterDict]] + handlers: NotRequired[Dict[str, HandlerDict]] + loggers: NotRequired[Dict[str, LoggerDict]] + root: NotRequired[LoggerDict] + incremental: NotRequired[bool] # default False + disable_existing_loggers: NotRequired[bool] # default True + + +DEFAULT_CONSOLE = Literal['console'] +DEFAULT_FILE = Literal['file'] +TYPE_PREDEFINED_HANDLER = Literal[DEFAULT_CONSOLE, DEFAULT_FILE] + +DEFAULT_STD_OUT: str = 'ext://sys.stdout' +DEFAULT_LOG_FN: str = 'error.log' +DEFAULT_HANDLER_OUT_MAP: Dict[TYPE_PREDEFINED_HANDLER, str] = { + get_args(DEFAULT_CONSOLE)[0]: DEFAULT_STD_OUT, + get_args(DEFAULT_FILE)[0]: DEFAULT_LOG_FN, +} + +TYPE_FORMAT_DEFAULT = Literal['default'] +TYPE_FORMAT_SIMPLE = Literal['simple'] +TYPE_PREDEFINED_FORMATTER = Literal[TYPE_FORMAT_DEFAULT, TYPE_FORMAT_SIMPLE] + +PREDEFINED_FORMATTER_MAP: Dict[TYPE_PREDEFINED_FORMATTER, FormattersDict] = { + get_args(TYPE_FORMAT_DEFAULT)[0]: { + 'class': 'logging.Formatter', + 'format': '%(asctime)s - %(levelname)s - %(message)s' + }, + get_args(TYPE_FORMAT_SIMPLE)[0]: { + 'class': 'logging.Formatter', + 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + } +} + +LEVEL_DEBUG = Literal['DEBUG'] +LEVEL_INFO = Literal['INFO'] +LEVEL_WARNING = Literal['WARNING'] +LEVEL_ERROR = Literal['ERROR'] +LEVEL_CRITICAL = Literal['CRITICAL'] +LEVEL_NOTSET = Literal['NOTSET'] +LEVEL_TYPE = Literal[LEVEL_NOTSET, LEVEL_CRITICAL, LEVEL_ERROR, LEVEL_WARNING, LEVEL_INFO, LEVEL_DEBUG] + + +def default_init_helper(d: Dict, key: str, factory: Callable) -> Any: + if key in d: + return + d[key] = factory() + + +def init(attr: str, key: str, factory: Callable) -> Callable: + def decorator(func): + def wrapped(self, *args, **kwargs): + # Execute the helper function with self.some_attr, b, c + default_init_helper(getattr(self, attr), key, factory) + # Then execute the original function + return func(self, *args, **kwargs) + return wrapped + return decorator + + +class LoggerConfigBuilder: + """Builder to create log conf schema. + + Examples: + config_builder = LoggerConfigBuilder(version=1) + config = (config_builder + .add_formatter_by_type(formatter_type='simple') + .add_handler_by_type(handler_type='console', level='DEBUG', formatter_type='simple') + .add_handler_by_type(handler_type='logfile', level='ERROR', formatter_type='simple') + .add_logger(name='myapp', level='DEBUG', handlers=['console', 'logfile']) + .config) + """ + + def __init__(self, version: int): + self._config = ConfigDict(version=version, ) + + @staticmethod + def get_predefined_formatter(formatter_type) -> FormattersDict: + if formatter_type not in PREDEFINED_FORMATTER_MAP: + raise ValueError(f'Unknown predefined formatter: {formatter_type}') + return PREDEFINED_FORMATTER_MAP[formatter_type] + + @staticmethod + def get_predefined_handler(handler_type: TYPE_PREDEFINED_HANDLER, + level: LEVEL_TYPE = 'DEBUG', + formatter: str = 'simple', + target: Optional[str] = None, + mode: str = 'w') -> HandlerDict: + target = target if target is not None else DEFAULT_HANDLER_OUT_MAP[handler_type] + if handler_type == get_args(DEFAULT_CONSOLE)[0]: + return { + 'class': 'logging.StreamHandler', + 'level': level, + 'formatter': formatter, + 'stream': target + } + elif handler_type == get_args(DEFAULT_FILE)[0]: + return { + 'class': 'logging.FileHandler', + 'level': level, + 'formatter': formatter, + 'filename': 'app.log', + 'mode': mode, + } + raise ValueError(f'Unknown pre-defined handler type: {handler_type}') + + @staticmethod + def create_logger(*, level: LEVEL_TYPE = 'DEBUG', handlers: List[str] = ('console', ), + propagate: bool = False, + filters: Optional[List[str | logging.Filter]] = None) -> LoggerDict: + if filters is None: + filters = [] + return LoggerDict(level=level, handlers=handlers, propagate=propagate, filters=filters) + + @init("config", 'formatters', dict) + def add_formatter_by_type(self, *, formatter_type: TYPE_PREDEFINED_FORMATTER): + # default_init(self.config, 'formatters', dict) + self._config['formatters'][formatter_type] = self.get_predefined_formatter(formatter_type) + return self + + @init("config", 'formatters', dict) + def add_formatter(self, *, name: str, formatter_dict: FormattersDict): + # default_init(self.config, 'formatters', dict) + self._config['formatters'][name] = formatter_dict + return self + + @init("config", 'handlers', dict) + def add_handler(self, *, name: str, handler_dict: HandlerDict): + self._config['handlers'][name] = handler_dict + return self + + @init("config", 'handlers', dict) + def add_handler_by_type(self, *, handler_type: TYPE_PREDEFINED_HANDLER, level: LEVEL_TYPE, + formatter: str): + self._config['handlers'][handler_type] = self.get_predefined_handler(handler_type, level, formatter) + return self + + @init("config", 'loggers', dict) + def add_logger(self, *, name: str, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False, + filters: Optional[Sequence[str | logging.Filter]] = None) -> Self: + self._config['loggers'][name] = self.create_logger(level=level, handlers=handlers, + propagate=propagate, + filters=filters) + return self + + @init("config", 'root', dict) + def add_root(self, *, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False, + filters: Optional[Sequence[str | logging.Filter]] = None) -> Self: + self._config['root'] = self.create_logger(level=level, handlers=handlers, + propagate=propagate, + filters=filters) + return self + + @init("config", 'handlers', dict) + def set_handler_target(self, handler_name: str, out_field: str, out_value: str): + assert handler_name in self.config['handlers'] + handler_dict: HandlerDict = self.config['handlers'][handler_name] + out_field = cast(Literal['filename'], out_field) + handler_dict[out_field] = out_value + return self + + @property + def config(self): + return self._config diff --git a/histoqc/_logging.py b/histoqc/_logging.py new file mode 100644 index 0000000..7bf7a8f --- /dev/null +++ b/histoqc/_logging.py @@ -0,0 +1,226 @@ +from __future__ import annotations +import multiprocessing +import dask +import threading +import os +from contextlib import contextmanager +from logging.handlers import QueueHandler +from typing import Dict, cast, Optional, List +from typing_extensions import Literal, Self +import warnings +from dask.distributed import WorkerPlugin +import logging +from logging.config import dictConfig +from dask.distributed import get_worker +from histoqc._log_conf import LoggerConfigBuilder +DEFAULT_LOG_FN = 'error.log' +FMT_DFT = cast(Literal['default'], 'default') +HDL_CONSOLE = 'console' +HDL_FILE = 'file' +HDL_OUT_FIELD = 'filename' +MAIN_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) + .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT) + .add_handler_by_type(handler_type=HDL_FILE, level='WARNING', formatter=FMT_DFT) + .add_root(level="INFO", handlers=[HDL_CONSOLE, HDL_FILE]) + .set_handler_target(handler_name=HDL_FILE, out_field=HDL_OUT_FIELD, out_value=DEFAULT_LOG_FN)) + +WORKER_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) + .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT) + .add_root(level="INFO", handlers=[HDL_CONSOLE])) + + +def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore'): + # configure warnings too... + filter_type = Literal["default", "error", "ignore", "always", "module", "once"] + warnings.filterwarnings(cast(filter_type, filter_warnings)) + logging.captureWarnings(capture_warnings) + + +class DaskLogHandler(logging.Handler): + """Custom Handler, which emits the topic/message from builtin logging.Logger to dask's centralized logging. + + Could be useful for future if we need to migrate from builtin logger to dask's logger + """ + topic: str + + def emit(self, record): + worker = get_worker() + log_entry = self.format(record) + worker.log_event(self.topic, log_entry) + + def __init__(self, topic: str = 'logs'): + super().__init__() + self.topic = topic + + +class WorkerInitializer(WorkerPlugin): + """Worker Initializer + + """ + worker_config: Dict + logger_names: List[Optional[str]] + + def __init__(self, worker_config: Dict, capture_warnings: bool = True, filter_warnings: str = 'ignore'): + self.worker_config = worker_config + self.capture_warnings = capture_warnings + self.filter_warnings = filter_warnings + + def setup(self, worker): + logging.config.dictConfig(self.worker_config) + handle_warning(capture_warnings=self.capture_warnings, filter_warnings=self.filter_warnings) + + @classmethod + def build(cls, worker_config: Dict, + capture_warnings: bool = True, filter_warnings: str = 'ignore') -> Self: + return cls(worker_config, capture_warnings, filter_warnings) + + +class LoggingSetup: + _plugin: WorkerPlugin + _main_build: LoggerConfigBuilder + _worker_build_list = List[LoggerConfigBuilder] + + def __init__(self, main_build: LoggerConfigBuilder, + *worker_builds: LoggerConfigBuilder, + capture_warnings: bool = True, filter_warnings: str = 'ignore'): + self._main_build = main_build + self._worker_build_list = list(worker_builds) + self._capture_warnings = capture_warnings + self._filter_warnings = filter_warnings + + def is_main_proc(self): + return multiprocessing.current_process().name == "MainProcess" + + @classmethod + def client_setup_plugin(cls, client: dask.distributed.Client, + conf_build: LoggerConfigBuilder, + capture_warnings: bool, + filter_warnings: str) -> dask.distributed.Client: + plugin = WorkerInitializer.build(conf_build.config, capture_warnings=capture_warnings, + filter_warnings=filter_warnings) + client.register_plugin(plugin) + return client + + def setup_client(self, client: dask.distributed.Client, forward_name: Optional[str]): + # todo fill-in + if not self.is_main_proc(): + return client + # main logger setup + for wb in self._worker_build_list: + client = self.client_setup_plugin(client, wb, self._capture_warnings, self._filter_warnings) + # forward to the main process logger + client.forward_logging(logger_name=forward_name) + return client + + def curate_filename(self, *, output_dir: Optional[str], fname: Optional[str], + handler_name: Optional[str], out_field: Optional[str]): + # do nothing if any of below is None: + if handler_name is None or fname is None or out_field is None: + return + dest = fname + if output_dir is not None: + os.makedirs(output_dir, exist_ok=True) + dest = os.path.join(output_dir, fname) + assert dest is not None and handler_name is not None and out_field is not None + self._main_build = self._main_build.set_handler_target(handler_name=handler_name, + out_field=out_field, out_value=dest) + return self._main_build + + def setup_main_logger(self, *, output_dir: Optional[str], + fname: Optional[str], + handler_name: Optional[str], + out_field: Optional[str]): + if not self.is_main_proc(): + return + # main logger + self._main_build = self.curate_filename(output_dir=output_dir, fname=fname, + handler_name=handler_name, out_field=out_field) + + logging.config.dictConfig(self._main_build.config) + + def setup(self, *, + client: dask.distributed.Client, + forward_name: Optional[str], + output_dir: Optional[str], + fname: str = DEFAULT_LOG_FN, + handler_name: Optional[str] = HDL_FILE, + out_field: Optional[str] = HDL_OUT_FIELD + ): + if not self.is_main_proc(): + return + self.setup_main_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field) + self.setup_client(client, forward_name) + + +class MultiProcessingLogManager: + """Adapted from Andreas Poehlmann's implementation. + + Under the hood of dask, worker loggers can be forwarded + to client directly for both local and distributed clusters. No need + + """ + def __init__(self, logger_name, *, manager): + """create a MultiProcessingLogManager + + Note: this uses a multiprocessing Queue to correctly transfer + logging information from worker processes to the main + process logging instance + + Parameters + ---------- + logger_name : str + the name of the logger instance + manager : multiprocessing.Manager + the mp Manager instance used for creating sharable context + """ + self._logger_name = logger_name + self._log_queue = manager.Queue() + self._log_thread_active = False + + @property + def is_main_process(self): + return multiprocessing.current_process().name == "MainProcess" + + @property + def logger(self): + """returns the logger instance""" + if self.is_main_process: + return logging.getLogger(self._logger_name) + else: + root = logging.getLogger() + if not root.hasHandlers(): + qh = QueueHandler(self._log_queue) + root.setLevel(logging.INFO) + root.addHandler(qh) + # note: this should be revisited and set by the main process + warnings.filterwarnings('ignore') + logging.captureWarnings(True) + return root + + @contextmanager + def logger_thread(self): + """context manager for multiprocess logging + + Note: this starts the thread responsible for handing the log records + emitted by child processes to the main logger instance + """ + assert self.is_main_process + assert not self._log_thread_active # not reentrant... + self._log_thread_active = True + + def process_queue(q, ln): + main_logger = logging.getLogger(ln) + while True: + log_record = q.get() + if log_record is None: + break + main_logger.handle(log_record) + + lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name)) + lt.start() + try: + yield self + finally: + self._log_queue.put(None) + lt.join() + self._log_thread_active = False diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py index 6c1a4e1..b49f5a9 100644 --- a/histoqc/_pipeline.py +++ b/histoqc/_pipeline.py @@ -9,14 +9,11 @@ import os import platform import shutil -import threading import warnings from contextlib import ExitStack -from contextlib import contextmanager from contextlib import nullcontext from importlib import import_module from logging.config import dictConfig -from logging.handlers import QueueHandler from typing_extensions import Literal from typing import cast @@ -47,7 +44,7 @@ def setup_logging(*, capture_warnings, filter_warnings): 'handlers': { 'console': { 'class': 'logging.StreamHandler', - 'level': 'INFO', + 'level': 'DEBUG', # todo 'formatter': 'default', }, 'logfile': { @@ -102,90 +99,21 @@ def move_logging_file_handler(logger, destination): logger.addHandler(new_handler) -class MultiProcessingLogManager: - - def __init__(self, logger_name, *, manager): - """create a MultiProcessingLogManager - - Note: this uses a multiprocessing Queue to correctly transfer - logging information from worker processes to the main - process logging instance - - Parameters - ---------- - logger_name : str - the name of the logger instance - manager : multiprocessing.Manager - the mp Manager instance used for creating sharable context - """ - self._logger_name = logger_name - self._log_queue = manager.Queue() - self._log_thread_active = False - - @property - def is_main_process(self): - return multiprocessing.current_process().name == "MainProcess" - - @property - def logger(self): - """returns the logger instance""" - if self.is_main_process: - return logging.getLogger(self._logger_name) - else: - root = logging.getLogger() - if not root.hasHandlers(): - qh = QueueHandler(self._log_queue) - root.setLevel(logging.INFO) - root.addHandler(qh) - # note: this should be revisited and set by the main process - warnings.filterwarnings('ignore') - logging.captureWarnings(True) - return root - - @contextmanager - def logger_thread(self): - """context manager for multiprocess logging - - Note: this starts the thread responsible for handing the log records - emitted by child processes to the main logger instance - """ - assert self.is_main_process - assert not self._log_thread_active # not reentrant... - self._log_thread_active = True - - def process_queue(q, ln): - main_logger = logging.getLogger(ln) - while True: - log_record = q.get() - if log_record is None: - break - main_logger.handle(log_record) - - lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name)) - lt.start() - try: - yield - finally: - self._log_queue.put(None) - lt.join() - self._log_thread_active = False - - -def log_pipeline(config, log_manager): +def log_pipeline(config, logger: logging.Logger): """log the pipeline information Parameters ---------- config : configparser.ConfigParser - log_manager : MultiProcessingLogManager + logger : logger obj to log the messages """ - assert log_manager.is_main_process + assert multiprocessing.current_process().name == "MainProcess" steps = config.get(section='pipeline', option='steps').splitlines() - log_manager.logger.info("the pipeline will use these steps:") + logger.info("the pipeline will use these steps:") for process in steps: mod_name, func_name = process.split('.') - log_manager.logger.info(f"\t\t{mod_name}\t{func_name}") + logger.info(f"\t\t{mod_name}\t{func_name}") return steps diff --git a/histoqc/_worker.py b/histoqc/_worker.py index 163a998..d55d5d2 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -1,52 +1,36 @@ """histoqc worker functions""" -import multiprocessing +import logging import os import shutil import traceback from histoqc.BaseImage import BaseImage -from histoqc._pipeline import load_pipeline +from histoqc._pipeline import BatchedResultFile from histoqc._pipeline import setup_plotting_backend -from typing import Dict, List, Optional -from multiprocessing import managers - +from typing import List, Optional KEY_ASSIGN: str = 'device_assign' PARAM_SHARE: str = 'shared_dict' # --- worker functions -------------------------------------------------------- -def id_assign_helper(device_id_list: List[int], assign_dict: managers.DictProxy): - pid = os.getpid() - for device_id in device_id_list: - if device_id not in assign_dict.values(): - assign_dict[pid] = device_id - return - - -def device_assign(device_id_list: List[int], shared_dict: managers.DictProxy): - """Initializer to configure each worker with a specific GPU.""" - shared_dict[KEY_ASSIGN] = shared_dict.get(KEY_ASSIGN, None) - assert shared_dict[KEY_ASSIGN] is not None - assert KEY_ASSIGN in shared_dict - id_assign_helper(device_id_list, shared_dict[KEY_ASSIGN]) - -def worker_setup(c, device_id_list: List[int], state: Dict): +# c: configparser.Parser, cuda: bool, device_id_list: List[int], state: Dict +def worker_setup(): """needed for multiprocessing worker setup""" setup_plotting_backend() - shared_dict = state[PARAM_SHARE] - load_pipeline(config=c) - device_assign(device_id_list, shared_dict) + # shared_dict = state[PARAM_SHARE] + # load_pipeline(config=c) + # device_assign(cuda, device_id_list, shared_dict) def worker(idx, file_name, *, - process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force): + process_queue, config, outdir, lock, shared_dict, num_files, force): """pipeline worker function""" - + logger = logging.getLogger() # --- output directory preparation -------------------------------- fname_outdir = os.path.join(outdir, os.path.basename(file_name)) if os.path.isdir(fname_outdir): # directory exists if not force: - log_manager.logger.warning( + logger.warning( f"{file_name} already seems to be processed (output directory exists)," " skipping. To avoid this behavior use --force" ) @@ -57,11 +41,13 @@ def worker(idx, file_name, *, # create output dir os.makedirs(fname_outdir) - log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") - device_id = shared_dict[KEY_ASSIGN].get(os.getpid(), None) + logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") + # let Dask handle the device visibility/assignment + device_id = 0 # shared_dict[KEY_ASSIGN].get(os.getpid(), None) + # logger.info(f"{__name__} - {file_name}: Device ID: {dict(shared_dict[KEY_ASSIGN])}") if device_id is None: - log_manager.logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id." - f"Default: use 0 for CUDA devices.") + logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id." + f"Default: use 0 for CUDA devices.") s: Optional[BaseImage] = None try: @@ -74,13 +60,15 @@ def worker(idx, file_name, *, s["completed"].append(process.__name__) except Exception as exc: # reproduce histoqc error string + logger.info(f"{file_name}: Error Block") if s is not None: - s.image_handle.release() - print(f"DBG: {__name__}: {exc}") + # s.image_handle.release() + s.image_handle.close() + # print(f"DBG: {__name__}: {exc}") _oneline_doc_str = exc.__doc__.replace('\n', '') if exc.__doc__ is not None else '' err_str = f"{exc.__class__} {_oneline_doc_str} {exc}" trace_string = traceback.format_exc() - log_manager.logger.error( + logger.error( f"{file_name} - Error analyzing file (skipping): \t {err_str}. Traceback: {trace_string}" ) if exc.__traceback__.tb_next is not None: @@ -114,7 +102,7 @@ def worker_success(s, result_file): result_file.write_line("\t".join([_fields, _warnings])) -def worker_error(e, failed): +def worker_error(e, failed: List): """error callback""" if hasattr(e, '__histoqc_err__'): file_name, err_str, tb = e.__histoqc_err__ @@ -124,3 +112,21 @@ def worker_error(e, failed): # around the worker function file_name, err_str, tb = "N/A", f"error outside of pipeline {e!r}", None failed.append((file_name, err_str, tb)) + + +def worker_flow_for_file(idx: int, file_name: str, + failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]: + try: + base_image = worker(idx, file_name, **kwargs) + except Exception as exc: + base_image = None + worker_error(exc, failed) + else: + worker_success(base_image, results) + return base_image + + +def worker_single_process(files, failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]: + for idx, file_name in enumerate(files): + return worker_flow_for_file(idx, file_name, failed, results, **kwargs) + diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py index 52b3a15..bb6d71f 100644 --- a/histoqc/array_adapter/adapter.py +++ b/histoqc/array_adapter/adapter.py @@ -5,7 +5,7 @@ from numbers import Number from typing import Callable, Mapping, Tuple, Optional, Any, Iterable, Dict from typing_extensions import Self, TypeGuard -from histoqc.import_wrapper.cupy_extra import cupy as cp +from histoqc.import_wrapper.cupy_extra import cupy as cp, cupy_installed from enum import Enum import logging import functools @@ -14,14 +14,6 @@ import re -def cupy_installed() -> bool: - try: - import cupy - return True - except ImportError: - return False - - class ArrayDeviceType(Enum): CPU: str = 'cpu' CUDA: str = 'cuda' @@ -134,6 +126,17 @@ class ArrayAdapter(Callable): id: int + @classmethod + def get_device(cls, arr: TYPE_ARRAY): + if not cls.is_array(arr): + logging.warning(f"{__name__} {type(arr)} is not an Array") + return None + if cls.is_numpy(arr): + return Device.build(Device.DEVICE_CPU) + assert cls.is_cupy(arr), f"Not a Cupy array: {type(arr)}" + device_id = arr.device.id + return Device(ArrayDeviceType.CUDA, device_id) + @staticmethod def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]: return isinstance(arr, np.ndarray) @@ -192,11 +195,16 @@ def _move_to_device(cls, device: Optional[Device], copy: bool = False) -> TYPE_ARRAY: # structural match > py3.10 if device is None or not cls.is_array(arr): + logging.debug(f"Not Array") return arr assert device is not None if device.is_cpu(): + logging.debug(f"Move to CPU. InputType: {type(arr)}. Input Device: {cls.get_device(arr)}," + f" shape{arr.shape}") return ArrayAdapter.to_numpy(arr, copy=copy) elif device.is_cuda(): + logging.debug(f"Move to GPU: {device}. InputType: {type(arr)}. Input Device: {cls.get_device(arr)}" + f" shape{arr.shape}") return ArrayAdapter.to_cupy(arr, device, copy=copy) raise ValueError(f"Unsupported device: {device}") @@ -276,12 +284,15 @@ def unified_call(cls, output_device: Optional[Device], data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: # use input_device to override the current device, if not None - data = cls.curate_arrays_device(data, device=input_device, copy=False) + data = cls.curate_arrays_device(data, device=input_device, copy=True) # if data is None --> use input device. # if input_device is None, by default will invoke GPU interface input_type = cls.array_device_type(data) if data is not None else input_device # attempt to fetch the op, revert to CPU if GPU impl is not available (func=GPU impl, func_device=cuda func, func_device = cls.get_api(cpu_func, func_map, input_type) + logging.debug(f"{__name__}: Call Adapter for {cpu_func} with " + f"In Device: {input_device}, Out Device: {output_device}." + f"Mapped to: {func} and actual input device: {func_device}") func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True) curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True) diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py index 33d6586..02eb76e 100644 --- a/histoqc/import_wrapper/cupy_extra.py +++ b/histoqc/import_wrapper/cupy_extra.py @@ -1,8 +1,17 @@ +from __future__ import annotations + try: import cupy # cupy.cuda.set_allocator(None) - cupy.cuda.set_allocator(cupy.cuda.MemoryPool().malloc) except ImportError: cupy = None finally: cp = cupy + + +def cupy_installed() -> bool: + try: + import cupy + return True + except ImportError: + return False diff --git a/histoqc/import_wrapper/dask_cuda.py b/histoqc/import_wrapper/dask_cuda.py new file mode 100644 index 0000000..8b98acf --- /dev/null +++ b/histoqc/import_wrapper/dask_cuda.py @@ -0,0 +1,12 @@ +try: + import dask_cuda +except ImportError: + dask_cuda = None + + +def dask_cuda_installed() -> bool: + try: + import dask_cuda + return True + except ImportError: + return False diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py index 56bf82e..1811cbb 100644 --- a/histoqc/wsi_handles/base.py +++ b/histoqc/wsi_handles/base.py @@ -144,7 +144,7 @@ def comment(self) -> str: ... @abstractmethod - def get_thumbnail(self, new_dim) -> Union[ARRAY, Backend]: + def get_thumbnail(self, new_dim) -> PILImage: ... @abstractmethod diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py index d9c6a70..8fd081d 100644 --- a/histoqc/wsi_handles/cuimage_handle.py +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -1,6 +1,8 @@ from __future__ import annotations -import skimage.util +import sys + +# import skimage.util from PIL.Image import Image as PILImage from cucim.clara import CuImage from .base import WSIImageHandle @@ -16,6 +18,7 @@ from histoqc.array_adapter import ArrayDeviceType, Device from types import MappingProxyType import logging +import os DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA) @@ -28,7 +31,7 @@ class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): _associated_images: Optional[Mapping] # TODO: standalone parser of vendor information - dummy_handle: Optional[openslide.OpenSlide] + dummy_handle_spill: Optional[openslide.OpenSlide] def backend_rgba2rgb(self, img: CuImage) -> CuImage: # todo: it appears that CuImage does not take care of the alpha channel at all. @@ -46,7 +49,7 @@ def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device: with cp.cuda.Device(device.device_id): w, h, *_ = new_size_wh arr = cp.array(data) - return c_skimage.transform.resize(arr, output_shape=(h, w)) + return c_skimage.transform.resize(arr, output_shape=(h, w), order=3, anti_aliasing=True) def __init__(self, fname: str, device_id: Optional[int]): super().__init__(fname, device_id) @@ -54,28 +57,29 @@ def __init__(self, fname: str, device_id: Optional[int]): self.handle = CuImage(fname) # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a # todo - native unified metadata interface for different vendors. - self.dummy_handle = openslide.OpenSlide(fname) - logging.debug(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}") + # todo - workaround as memory spilling option + self.dummy_handle_spill = openslide.OpenSlide(fname) + logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}") @LazyProperty def background_color(self): - return f"#{self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + return f"#{self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" @LazyProperty def bounding_box(self): dim_width, dim_height = self.dimensions - x = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0)) - y = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0)) - width = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width)) - height = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height)) + x = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0)) + y = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0)) + width = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width)) + height = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height)) return x, y, width, height @LazyProperty def has_bounding_box(self): - return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties - and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties - and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle.properties - and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle.properties + return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle_spill.properties ) @LazyProperty @@ -84,8 +88,8 @@ def dimensions(self): @LazyProperty def magnification(self): - return self.dummy_handle.properties.get("openslide.objective-power") or \ - self.dummy_handle.properties.get("aperio.AppMag") + return self.dummy_handle_spill.properties.get("openslide.objective-power") or \ + self.dummy_handle_spill.properties.get("aperio.AppMag") @property def level_count(self): @@ -101,21 +105,21 @@ def level_downsamples(self): @property def vendor(self): - return self.dummy_handle.properties.get("openslide.vendor", "NA") + return self.dummy_handle_spill.properties.get("openslide.vendor", "NA") @property def mpp_x(self): - return self.dummy_handle.properties.get("openslide.mpp-x", "NA") + return self.dummy_handle_spill.properties.get("openslide.mpp-x", "NA") @property def mpp_y(self): - return self.dummy_handle.properties.get("openslide.mpp-y", "NA") + return self.dummy_handle_spill.properties.get("openslide.mpp-y", "NA") @property def comment(self): - return self.dummy_handle.properties.get("openslide.comment", "NA") + return self.dummy_handle_spill.properties.get("openslide.comment", "NA") - def get_thumbnail(self, new_dim): + def get_thumbnail(self, new_dim) -> cp.ndarray: """Get thumbnail Args: @@ -125,23 +129,36 @@ def get_thumbnail(self, new_dim): """ # from openslide - with cp.cuda.Device(self.device.device_id): + with (cp.cuda.Device(self.device.device_id)): downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim))) level = self.get_best_level_for_downsample(downsample) target_w, target_h = (x // int(downsample) for x in self.dimensions) - aspect_ratio = self.dimensions[0] / self.dimensions[1] + # resize thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level])) try: - thumb_cp = cp.array(thumb, copy=True) - target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) - resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), clip=True) - return c_skimage.util.img_as_ubyte(resized) + thumb_cp: cp.ndarray = cp.array(thumb, copy=False) + # todo: for reproducibility -> openslide uses LANCZOS filter in PILLOW + # but the exact detail of LANCZOS resampling (e.g., kernel size) is not specified in documentation. + # need to implement our own LANCZOS resampling for cupy later. + # aspect_ratio = self.dimensions[0] / self.dimensions[1] + # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) + # resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), order=1, + # anti_aliasing=False) + # return c_skimage.util.img_as_ubyte(resized) + resized_pil = Image.fromarray(thumb_cp.get() + ).convert("RGB").resize((target_w, target_h), + resample=Image.Resampling.LANCZOS) except Exception: - logging.warning(f"{__name__} - {self.fname}: OOM on {self.device.device_id}. {traceback.format_exc()}") - thumb_np = np.array(thumb, copy=True) - thumb_np = skimage.util.img_as_ubyte(thumb_np) - return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB") + # self.reload() + logging.error(f"{__name__} - {self.fname}: OOM on {self.device.device_id}." + f"Use CPU" + f"Error Message Dumped: {traceback.format_exc()}") + # thumb_np = np.array(thumb, copy=True) + # thumb_np = skimage.util.img_as_ubyte(thumb_np) + # return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB") + resized_pil = self.dummy_handle_spill.get_thumbnail(new_dim).convert("RGB") + return cp.array(resized_pil, copy=False) def get_best_level_for_downsample(self, down_factor: float) -> int: """Return the largest level that's smaller than the target downsample factor, consistent with openslide. @@ -161,13 +178,16 @@ def get_best_level_for_downsample(self, down_factor: float) -> int: def region_backend(self, location, level, size, **kwargs) -> CuImage: with cp.cuda.Device(self.device.device_id): - return self.handle.read_region(location=location, level=level, size=size, **kwargs) + return self.handle.read_region(location=location, level=level, size=size, + num_workers=max(1, os.cpu_count() // 2), + **kwargs) @staticmethod def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray: device = CuImageHandle.validate_device(device) with cp.cuda.Device(device.device_id): result = cp.array(region, copy=False) + logging.debug(f"{__name__} - {device.device_id} == {result.device}") return result @@ -223,16 +243,21 @@ def release(self): cp.get_default_pinned_memory_pool().free_all_blocks() def close_handle(self): + logging.debug(f"{__name__}: {self.fname} - closed") if hasattr(self, "handle") and self.handle is not None: self.handle.close() del self.handle self.handle = None - if self.dummy_handle is not None: - self.dummy_handle.close() - self.dummy_handle = None + if self.dummy_handle_spill is not None: + self.dummy_handle_spill.close() + self.dummy_handle_spill = None # clear the cached map self.clear_associated_images() + # self.release() + + def reload(self): self.release() + self.handle = CuImage(self.fname) @property def device_type(self) -> ArrayDeviceType: diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py index 1c28c33..3225583 100644 --- a/histoqc/wsi_handles/openslide_handle.py +++ b/histoqc/wsi_handles/openslide_handle.py @@ -41,6 +41,7 @@ def __init__(self, fname: str, device_id: Optional[int] = None): # get background color self._background_color = f"#{self.handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + logging.info(f"{__name__}: {fname}: Create OpenSlideHandle at {device_id}. {self.device}") def __get_bounding_box(self) -> Tuple[int, int, int, int]: (dim_width, dim_height) = self.handle.dimensions @@ -127,7 +128,7 @@ def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage: @staticmethod def backend_to_array(region: PILImage, device: Optional[Device]) -> np.ndarray: OpenSlideHandle.sanitize_device(device) - return np.array(region) + return np.array(region, copy=False) @staticmethod def array_to_numpy(arr: np.ndarray) -> np.ndarray: diff --git a/requirements.txt b/requirements.txt index 30754ef..0bc584d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ requests~=2.28.1 Pillow~=9.4.0 setuptools~=65.6.3 shapely~=2.0.1 +dask~=2024.1.1 diff --git a/setup.py b/setup.py index e6a092b..49a6691 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup_requires=['setuptools_scm'], extras_require={ "dicom": ["wsidicom"], - "cucim": ["cucim", "cupy"], + "cucim": ["cucim", "cupy", "dask-cuda"], }, package_data={ 'histoqc.config': ['*.ini'], From 761e3d3af8d3a14bbe2d6a6c187f6d9ab7288e53 Mon Sep 17 00:00:00 2001 From: CielAl Date: Fri, 10 May 2024 02:24:47 -0400 Subject: [PATCH 6/6] misc fix - matplotlib backend specified in worker init --- histoqc/BaseImage.py | 6 +- histoqc/ClassificationModule.py | 136 +++++++++++++--------- histoqc/__main__.py | 81 ++++++++----- histoqc/_pipeline.py | 20 ---- histoqc/_worker.py | 2 +- histoqc/{_logging.py => _worker_setup.py} | 44 +++++-- histoqc/array_adapter/adapter.py | 35 ++++-- histoqc/wsi_handles/base.py | 16 ++- histoqc/wsi_handles/cuimage_handle.py | 75 +++++++----- histoqc/wsi_handles/openslide_handle.py | 4 +- rmm_log.txt | 2 + 11 files changed, 256 insertions(+), 165 deletions(-) rename histoqc/{_logging.py => _worker_setup.py} (86%) create mode 100644 rmm_log.txt diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index 97fe60c..0a8ab55 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -53,7 +53,7 @@ def image_handle(self) -> Optional[WSIImageHandle]: def image_handle(self, image_handle: WSIImageHandle): self._image_handle = image_handle - def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None): + def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None, num_threads: Optional[int] = 1): dict.__init__(self) # init self._device_id = device_id @@ -62,7 +62,9 @@ def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None) # dynamically load wsi image handle try: - self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles, device_id=device_id) + self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, + handles, device_id=device_id, + num_threads=num_threads) except Exception: trace_string = traceback.format_exc() logging.error(f"{__name__}: {fname} -- Error Creating Handle - Traceback: {trace_string}") diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 5096bc9..ef72dae 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -4,7 +4,7 @@ import sys from histoqc.array_adapter import ArrayAdapter, Device from ast import literal_eval as make_tuple -import traceback +from dask.distributed import Lock from distutils.util import strtobool from histoqc.BaseImage import printMaskHelper, BaseImage @@ -188,6 +188,86 @@ def compute_features(img, params): return np.concatenate(feats, axis=2) +def new_clf_model(s: BaseImage, params): + name = params.get("name", "classTask") + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") + adapter = s.image_handle.adapter + model_vals = [] + model_labels = adapter.sync(np.empty([0, 1])) + nsamples_per_example = float(params.get("nsamples_per_example", -1)) + + for ex in params["examples"].splitlines(): + ex = re.split(r'(? 1 else int(mask.shape[0] + * nsamples_per_example) + idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) + eximg = eximg[idxkeep, :] + mask = mask[idxkeep] + + model_vals.append(eximg) + # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray + # but we explicitly sync the device of mask and model_labels anyway + model_labels = np.vstack((model_labels, mask)) + + model_vals = np.vstack(model_vals) + clf = RandomForestClassifier(n_jobs=-1) + # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}") + model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels, + device=Device.build(Device.DEVICE_CPU), copy=True) + adapter(clf.fit)(model_vals, y=model_labels.ravel()) + # clf.fit(model_vals, y=model_labels.ravel() + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") + return clf + + +def _cache_clf(s: BaseImage, params, var_name): # f"model_{name}" + # name = params.get("name", "classTask") + # model_var = Variable(var_name) + + with Lock(var_name): # this lock is shared across all threads such that only one thread needs to train the model + # then it is shared with all other modules + if not params["shared_dict"].get(var_name, False): + clf = new_clf_model(s, params) + logging.info(f"{s['filename']} - saving to cache...") + params["shared_dict"][var_name] = clf + logging.info(f"{s['filename']} - cached...") + + logging.info(f"{s['filename']} - Lock Released...") + # try: + # clf = model_var.get() + # except ValueError: + # clf = new_clf_model(s, params) + # model_var.set(clf) + + +def get_clf(s: BaseImage, params): + name = params.get("name", "classTask") + var_name = f"model_{name}" + _cache_clf(s, params, var_name) + # clf = Variable(var_name).get() # params["shared_dict"]["model_" + name] + clf = params["shared_dict"]["model_" + name] + return clf + + def byExampleWithFeatures(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") @@ -207,59 +287,9 @@ def byExampleWithFeatures(s: BaseImage, params): adapter = s.image_handle.adapter params['adapter'] = adapter params['filename'] = s['filename'] - with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model - # then it is shared with all other modules - if not params["shared_dict"].get("model_" + name, False): - - logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") - - model_vals = [] - model_labels = adapter.sync(np.empty([0, 1])) - - for ex in params["examples"].splitlines(): - ex = re.split(r'(? 1 else int(mask.shape[0] - * nsamples_per_example) - idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) - eximg = eximg[idxkeep, :] - mask = mask[idxkeep] - - model_vals.append(eximg) - # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray - # but we explicitly sync the device of mask and model_labels anyway - model_labels = np.vstack((model_labels, mask)) - - # do stuff here with model_vals - model_vals = np.vstack(model_vals) - clf = RandomForestClassifier(n_jobs=-1) - # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}") - model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels, - device=Device.build(Device.DEVICE_CPU), copy=True) - adapter(clf.fit)(model_vals, y=model_labels.ravel()) - # clf.fit(model_vals, y=model_labels.ravel()) - params["shared_dict"]["model_" + name] = clf - logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") - clf = params["shared_dict"]["model_" + name] + clf = get_clf(s, params) + logging.info(f"{__name__} - {s['filename']} Infer with the trained model.") img = s.getImgThumb(s["image_work_size"]) feats = compute_features(img, params) logging.debug(f"{__name__} - {s['filename']} - NaN check img: {np.isnan(img).any()}") diff --git a/histoqc/__main__.py b/histoqc/__main__.py index b8ec752..a537948 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -11,16 +11,16 @@ from typing import Tuple, Optional, List import dask.distributed - +from logging.config import dictConfig from histoqc._pipeline import BatchedResultFile # from histoqc._pipeline import MultiProcessingLogManager from copy import deepcopy -from histoqc._logging import LoggingSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, HDL_OUT_FIELD +from histoqc._worker_setup import WorkerSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, \ + HDL_OUT_FIELD, setup_plotting_backend, LoggerConfigBuilder, FMT_DFT, HDL_CONSOLE from histoqc._pipeline import load_pipeline from histoqc._pipeline import log_pipeline -from histoqc._pipeline import move_logging_file_handler +# from histoqc._pipeline import move_logging_file_handler # from histoqc._pipeline import setup_logging -from histoqc._pipeline import setup_plotting_backend from histoqc._worker import (worker, worker_setup, worker_success, worker_error, worker_single_process, PARAM_SHARE, KEY_ASSIGN) @@ -75,12 +75,19 @@ def parse_multiprocessing(args: argparse.Namespace, return args, is_cuda -def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]]): +def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]], + nvlink: bool, spill_limit, rmm_pool, num_threads: int): assert nprocesses > 0, f"Expect number of processes > 0 to launch the cluster. Get {nprocesses}" if not is_cuda: - return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, ) + return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, threads_per_worker=num_threads) assert cp is not None and dask_cuda is not None - return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, n_workers=nprocesses) + return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, + jit_unspill=True, + threads_per_worker=num_threads, + device_memory_limit=spill_limit, + n_workers=nprocesses, + enable_nvlink=nvlink, + rmm_pool_size=rmm_pool, ) # rmm_maximum_pool_size="10GB" @managed_pkg_data @@ -127,6 +134,14 @@ def main(argv=None): nargs='+', help="GPU Devices to use (None=use all). Default: None", default=None) + parser.add_argument('--nvlink', action='store_true', + help='whether to enable NVLINK. Only applied when CUDA cluster is launched') + parser.add_argument('--rmm_pool', type=str, default="10GB", + help='Pool size. Only applied when CUDA cluster is launched') + parser.add_argument('--spill_limit', type=float, default=0.5, + help='Percentage threshold of GPU memory before spill to host memory') + parser.add_argument('--num_threads', type=int, default=4, + help='# of threads per worker.') args = parser.parse_args(argv) # --- multiprocessing and logging setup ----------------------------------- @@ -140,15 +155,18 @@ def main(argv=None): os.makedirs(args.outdir, exist_ok=True) # move_logging_file_handler(logging.getLogger(), args.outdir) - logging_setup = LoggingSetup(deepcopy(MAIN_CONF_BUILD), - deepcopy(WORKER_CONF_BUILD), - capture_warnings=True, filter_warnings='ignore') + logging_setup = WorkerSetup(deepcopy(MAIN_CONF_BUILD), + deepcopy(WORKER_CONF_BUILD), + capture_warnings=True, filter_warnings='ignore') # setup main proc logger config and file handler. - logging_setup.setup_main_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN, - handler_name=HDL_FILE, out_field=HDL_OUT_FIELD) + logging_setup.setup_mainprocess_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN, + handler_name=HDL_FILE, out_field=HDL_OUT_FIELD) # Inherit from the root logger. + main_logger = logging.getLogger(__name__) + main_logger.addHandler(logging.StreamHandler(sys.stdout)) + if conf_warn_msg: main_logger.warning(conf_warn_msg) @@ -220,7 +238,7 @@ def main(argv=None): 'process_queue': process_queue, 'config': config, 'outdir': args.outdir, - 'lock': mpm.Lock(), # todo transit to Dask's Lock + 'lock': dask.distributed.Lock('x'), # mpm.Lock(), # todo transit to Dask's Lock PARAM_SHARE: mpm.dict(), # todo transit to Dask's Variable 'num_files': num_files, 'force': args.force, @@ -231,29 +249,30 @@ def main(argv=None): setup_plotting_backend(main_logger) try: - if args.nprocesses > 0: local_cluster = new_cluster('histoqc', is_cuda=is_cuda, - nprocesses=args.nprocesses, gpu_ids=args.gpu_ids) - with Client(local_cluster) as client: - # register the worker side - logging_setup.setup_client(client, forward_name="root") - - # noinspection PyTypeChecker - futures_list = [client.submit(worker, idx, file_name, **_shared_state) - for idx, file_name in enumerate(files)] - - for future in as_completed(futures_list): - try: - base_img_finished = future.result() - except Exception as exc: - worker_error(exc, failed) - else: - worker_success(base_img_finished, result_file=results) + nprocesses=args.nprocesses, gpu_ids=args.gpu_ids, + nvlink=args.nvlink, spill_limit=args.spill_limit, + rmm_pool=args.rmm_pool, num_threads=args.num_threads) + with local_cluster: + with Client(local_cluster) as client: + # register the worker side + logging_setup.setup_client(client, forward_name="root") + + # noinspection PyTypeChecker + futures_list = [client.submit(worker, idx, file_name, **_shared_state) + for idx, file_name in enumerate(files)] + + for future in as_completed(futures_list): + try: + base_img_finished = future.result() + except Exception as exc: + worker_error(exc, failed) + else: + worker_success(base_img_finished, result_file=results) else: worker_setup() worker_single_process(files, failed, results, **_shared_state) - except KeyboardInterrupt: main_logger.info("-----REQUESTED-ABORT-----\n") diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py index b49f5a9..21c792f 100644 --- a/histoqc/_pipeline.py +++ b/histoqc/_pipeline.py @@ -7,7 +7,6 @@ import logging import multiprocessing import os -import platform import shutil import warnings from contextlib import ExitStack @@ -117,25 +116,6 @@ def log_pipeline(config, logger: logging.Logger): return steps -# --- worker process helpers ------------------------------------------ - -def setup_plotting_backend(logger=None): - """loads the correct matplotlib backend - - Parameters - ---------- - logger : - the logging.Logger instance - """ - import matplotlib - if platform.system() != "Windows" and not os.environ.get('DISPLAY'): - if logger is not None: - logger.info('no display found. Using non-interactive Agg backend') - matplotlib.use('Agg') - else: - matplotlib.use('TkAgg') - - class BatchedResultFile: """BatchedResultFile encapsulates the results writing diff --git a/histoqc/_worker.py b/histoqc/_worker.py index d55d5d2..8c29d54 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -5,7 +5,7 @@ import traceback from histoqc.BaseImage import BaseImage from histoqc._pipeline import BatchedResultFile -from histoqc._pipeline import setup_plotting_backend +from histoqc._worker_setup import setup_plotting_backend from typing import List, Optional KEY_ASSIGN: str = 'device_assign' PARAM_SHARE: str = 'shared_dict' diff --git a/histoqc/_logging.py b/histoqc/_worker_setup.py similarity index 86% rename from histoqc/_logging.py rename to histoqc/_worker_setup.py index 7bf7a8f..0de9472 100644 --- a/histoqc/_logging.py +++ b/histoqc/_worker_setup.py @@ -1,5 +1,7 @@ from __future__ import annotations import multiprocessing +import platform + import dask import threading import os @@ -19,14 +21,13 @@ HDL_FILE = 'file' HDL_OUT_FIELD = 'filename' MAIN_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) - .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT) .add_handler_by_type(handler_type=HDL_FILE, level='WARNING', formatter=FMT_DFT) - .add_root(level="INFO", handlers=[HDL_CONSOLE, HDL_FILE]) + .add_root(level="INFO", handlers=[HDL_FILE]) .set_handler_target(handler_name=HDL_FILE, out_field=HDL_OUT_FIELD, out_value=DEFAULT_LOG_FN)) WORKER_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) - .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT) - .add_root(level="INFO", handlers=[HDL_CONSOLE])) + .add_handler_by_type(handler_type=HDL_CONSOLE, level='INFO', formatter=FMT_DFT) + .add_root(level="DEBUG", handlers=[HDL_CONSOLE])) # HDL_CONSOLE def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore'): @@ -36,6 +37,25 @@ def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore logging.captureWarnings(capture_warnings) +def setup_plotting_backend(logger=None, distributed: bool = False): + """loads the correct matplotlib backend + + Parameters + ---------- + logger : + the logging.Logger instance + distributed: + whether a distributed framework is applied, which forces to enable Agg + """ + import matplotlib + if distributed or (platform.system() != "Windows" and not os.environ.get('DISPLAY')): + if logger is not None: + logger.info('no display found. Using non-interactive Agg backend') + matplotlib.use('Agg') + else: + matplotlib.use('TkAgg') + + class DaskLogHandler(logging.Handler): """Custom Handler, which emits the topic/message from builtin logging.Logger to dask's centralized logging. @@ -68,6 +88,7 @@ def __init__(self, worker_config: Dict, capture_warnings: bool = True, filter_wa def setup(self, worker): logging.config.dictConfig(self.worker_config) handle_warning(capture_warnings=self.capture_warnings, filter_warnings=self.filter_warnings) + setup_plotting_backend(logging.getLogger(), distributed=True) @classmethod def build(cls, worker_config: Dict, @@ -75,7 +96,7 @@ def build(cls, worker_config: Dict, return cls(worker_config, capture_warnings, filter_warnings) -class LoggingSetup: +class WorkerSetup: _plugin: WorkerPlugin _main_build: LoggerConfigBuilder _worker_build_list = List[LoggerConfigBuilder] @@ -126,10 +147,10 @@ def curate_filename(self, *, output_dir: Optional[str], fname: Optional[str], out_field=out_field, out_value=dest) return self._main_build - def setup_main_logger(self, *, output_dir: Optional[str], - fname: Optional[str], - handler_name: Optional[str], - out_field: Optional[str]): + def setup_mainprocess_logger(self, *, output_dir: Optional[str], + fname: Optional[str], + handler_name: Optional[str], + out_field: Optional[str]): if not self.is_main_proc(): return # main logger @@ -148,7 +169,7 @@ def setup(self, *, ): if not self.is_main_proc(): return - self.setup_main_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field) + self.setup_mainprocess_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field) self.setup_client(client, forward_name) @@ -224,3 +245,6 @@ def process_queue(q, ln): self._log_queue.put(None) lt.join() self._log_thread_active = False + +# --- worker process helpers ------------------------------------------ + diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py index bb6d71f..cd9767d 100644 --- a/histoqc/array_adapter/adapter.py +++ b/histoqc/array_adapter/adapter.py @@ -12,6 +12,13 @@ from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv import skimage import re +import asyncio + + +async def async_write_image(filename, arr: np.ndarray, **kwargs): + loop = asyncio.get_running_loop() + # noinspection PyArgumentList + await loop.run_in_executor(None, skimage.io.imsave, filename, arr, **kwargs) class ArrayDeviceType(Enum): @@ -142,7 +149,7 @@ def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]: return isinstance(arr, np.ndarray) @staticmethod - def is_cupy(arr: TYPE_CP) -> TypeGuard[TYPE_CP]: + def is_cupy(arr) -> TypeGuard[TYPE_CP]: return cupy_installed() and isinstance(arr, cp.ndarray) @staticmethod @@ -150,6 +157,7 @@ def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP: if ArrayAdapter.is_numpy(arr) or isinstance(arr, Number): return np.array(arr, copy=copy) assert ArrayAdapter.is_cupy(arr) + # logging.debug(f"{__name__}: CUPY->NUMPY detected. Expect latency.") return arr.get() @staticmethod @@ -157,6 +165,10 @@ def to_cupy(arr: TYPE_ARRAY | Number, device: Device, copy: bool = False) -> TYP assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \ f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}" assert device is not None and isinstance(device, Device) and device.is_cuda(), f"{device} is not CUDA device" + if ArrayAdapter.is_cupy(arr) and arr.device.id == device.device_id and not copy: + # logging.warning(f"{__name__}: Same Device. Return self:" + # f" {device.device_id}. {cp.cuda.runtime.deviceGetPCIBusId(arr.device.id)}") + return arr with cp.cuda.Device(device.device_id): return cp.array(arr, copy=copy) @@ -252,7 +264,7 @@ def get_api(cpu_func: Callable, func_map: Mapping[Callable, Callable], device: Optional[Device]) -> Tuple[Callable, Device]: if device is None: - logging.warning(f"Device unspecified in both input data and input device. Try: gpu:0") + logging.debug(f"Device unspecified in both input data and input device. Try: gpu:0") device = Device.build(Device.DEVICE_CUDA) if device.is_cpu(): return cpu_func, device @@ -284,7 +296,7 @@ def unified_call(cls, output_device: Optional[Device], data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: # use input_device to override the current device, if not None - data = cls.curate_arrays_device(data, device=input_device, copy=True) + data = cls.curate_arrays_device(data, device=input_device, copy=False) # if data is None --> use input device. # if input_device is None, by default will invoke GPU interface input_type = cls.array_device_type(data) if data is not None else input_device @@ -293,18 +305,18 @@ def unified_call(cls, logging.debug(f"{__name__}: Call Adapter for {cpu_func} with " f"In Device: {input_device}, Out Device: {output_device}." f"Mapped to: {func} and actual input device: {func_device}") - func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True) + func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=False) - curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True) + curated_args = cls.curate_arrays_device(*args, device=func_device, copy=False) curated_kwargs = dict() for k, v in kwargs.items(): - curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=True) + curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=False) output = cls.call_helper(func_in, func_device, func, *curated_args, **curated_kwargs) # only move the output around if the output is an array if isinstance(output, tuple): - return cls.curate_arrays_device(*output, device=output_device, copy=True) - return cls.curate_arrays_device(output, device=output_device, copy=True) + return cls.curate_arrays_device(*output, device=output_device, copy=False) + return cls.curate_arrays_device(output, device=output_device, copy=False) @classmethod def _validate_device(cls, device: Optional[Device | str | int]) -> Optional[Device]: @@ -454,8 +466,11 @@ def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: op=xor) @classmethod - def imsave(cls, filename: str, arr: TYPE_ARRAY, **kwargs): + def imsave(cls, filename: str, arr: TYPE_ARRAY, asynch: bool = True, **kwargs): logging.debug(f"{__name__}: SHAPE DBG {arr.shape}") arr = cls.curate_arrays_device(arr, device=Device.build(Device.DEVICE_CPU), copy=True) logging.debug(f"{__name__}: TYPE DBG {type(arr)}") - return skimage.io.imsave(filename, arr, **kwargs) + if not asynch: + skimage.io.imsave(filename, arr, **kwargs) + return + asyncio.run(async_write_image(filename, arr, **kwargs)) diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py index 1811cbb..4a0e6ab 100644 --- a/histoqc/wsi_handles/base.py +++ b/histoqc/wsi_handles/base.py @@ -23,6 +23,7 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]): fname: str _adapter: ArrayAdapter _device: Device + _num_threads: int @property def device(self) -> Device: @@ -317,14 +318,15 @@ def parse_wsi_handles(handle_list: str | List[str], delimiter: str, @classmethod def __create_handle(cls, fname: str, - handle_class_list: List[Callable[[str, Optional[int]], "WSIImageHandle"]], - device_id: Optional[int]) -> "WSIImageHandle": + handle_class_list: List[Callable[[str, Optional[int], Optional[int]], "WSIImageHandle"]], + device_id: Optional[int], + num_threads: Optional[int]) -> "WSIImageHandle": image_handle = None assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file" for handle_class in handle_class_list: # noinspection PyBroadException try: - image_handle = handle_class(fname, device_id) + image_handle = handle_class(fname, device_id, num_threads) break except Exception: # current wsi handle class doesn't support this file @@ -340,18 +342,20 @@ def __create_handle(cls, fname: str, @classmethod @final - def build_handle(cls, fname: str, handles: str, device_id: Optional[int]) -> "WSIImageHandle": + def build_handle(cls, fname: str, handles: str, device_id: Optional[int], + num_threads: Optional[int]) -> "WSIImageHandle": # get handles list module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES) handle_class_list = dynamic_import(module_list, attr_list, return_first=False) - image_handle = cls.__create_handle(fname, handle_class_list, device_id) + image_handle = cls.__create_handle(fname, handle_class_list, device_id, num_threads) return image_handle - def __init__(self, fname: str, device_id: Optional[int]): + def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int]): self.fname = fname self._device = Device(self.device_type, device_id) self._adapter = ArrayAdapter.build(input_device=self._device, output_device=self._device, contingent_device=self._device) + self._num_threads = num_threads if num_threads is not None else 1 @abstractmethod def close_handle(self): diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py index 8fd081d..f331586 100644 --- a/histoqc/wsi_handles/cuimage_handle.py +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -1,8 +1,6 @@ from __future__ import annotations -import sys - -# import skimage.util +import skimage.util from PIL.Image import Image as PILImage from cucim.clara import CuImage from .base import WSIImageHandle @@ -18,7 +16,7 @@ from histoqc.array_adapter import ArrayDeviceType, Device from types import MappingProxyType import logging -import os +# import os DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA) @@ -26,6 +24,9 @@ class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): + # todo: implement GPU-accelerated LANCZOS filter + USE_LANCZOS: bool = False + handle: Optional[CuImage] fname: str _associated_images: Optional[Mapping] @@ -51,15 +52,16 @@ def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device: arr = cp.array(data) return c_skimage.transform.resize(arr, output_shape=(h, w), order=3, anti_aliasing=True) - def __init__(self, fname: str, device_id: Optional[int]): - super().__init__(fname, device_id) + def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int] = 2): + super().__init__(fname, device_id, num_threads) self._associated_images = None self.handle = CuImage(fname) # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a # todo - native unified metadata interface for different vendors. # todo - workaround as memory spilling option self.dummy_handle_spill = openslide.OpenSlide(fname) - logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}") + logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}." + f"Corresponding CUDA device PID: {cp.cuda.runtime.deviceGetPCIBusId(device_id)}") @LazyProperty def background_color(self): @@ -119,6 +121,19 @@ def mpp_y(self): def comment(self): return self.dummy_handle_spill.properties.get("openslide.comment", "NA") + @staticmethod + def _resize_osh(thumb_cp: cp.ndarray, width: int, height: int) -> cp.ndarray: + resized_pil = Image.fromarray(thumb_cp.get() + ).convert("RGB").resize((width, height), + resample=Image.Resampling.LANCZOS) + return c_skimage.util.img_as_ubyte(cp.array(resized_pil, copy=False), force_copy=False) + + @staticmethod + def _resize_skimage(thumb_cp: cp.ndarray, width: int, height: int): + + resized = c_skimage.transform.resize(thumb_cp, output_shape=(height, width), order=0) + return c_skimage.util.img_as_ubyte(resized) + def get_thumbnail(self, new_dim) -> cp.ndarray: """Get thumbnail @@ -134,31 +149,27 @@ def get_thumbnail(self, new_dim) -> cp.ndarray: level = self.get_best_level_for_downsample(downsample) target_w, target_h = (x // int(downsample) for x in self.dimensions) + # aspect_ratio = self.dimensions[0] / self.dimensions[1] + # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) + # resize - thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level])) + # thumb = self.backend_rgba2rgb(self.region_backend(location=None, level=level)) + # + # try: + thumb = self.region_backend(level=level) + thumb_cp: cp.ndarray = cp.array(thumb, copy=False) try: - thumb_cp: cp.ndarray = cp.array(thumb, copy=False) - # todo: for reproducibility -> openslide uses LANCZOS filter in PILLOW - # but the exact detail of LANCZOS resampling (e.g., kernel size) is not specified in documentation. - # need to implement our own LANCZOS resampling for cupy later. - # aspect_ratio = self.dimensions[0] / self.dimensions[1] - # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) - # resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), order=1, - # anti_aliasing=False) - # return c_skimage.util.img_as_ubyte(resized) - resized_pil = Image.fromarray(thumb_cp.get() - ).convert("RGB").resize((target_w, target_h), - resample=Image.Resampling.LANCZOS) + if CuImageHandle.USE_LANCZOS: + return CuImageHandle._resize_osh(thumb_cp, target_w, target_h) + else: + return CuImageHandle._resize_skimage(thumb_cp, target_w, target_h) except Exception: # self.reload() logging.error(f"{__name__} - {self.fname}: OOM on {self.device.device_id}." - f"Use CPU" - f"Error Message Dumped: {traceback.format_exc()}") - # thumb_np = np.array(thumb, copy=True) - # thumb_np = skimage.util.img_as_ubyte(thumb_np) - # return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB") + f"Use CPU" + f"Error Message Dumped: {traceback.format_exc()}. Try CPU method...") resized_pil = self.dummy_handle_spill.get_thumbnail(new_dim).convert("RGB") - return cp.array(resized_pil, copy=False) + return cp.array(resized_pil, copy=False) def get_best_level_for_downsample(self, down_factor: float) -> int: """Return the largest level that's smaller than the target downsample factor, consistent with openslide. @@ -176,11 +187,15 @@ def get_best_level_for_downsample(self, down_factor: float) -> int: # find the indices of the down_indices that points to the best downsample factor value return cast(int, down_indices[down_values.argmax()]) - def region_backend(self, location, level, size, **kwargs) -> CuImage: + def region_backend(self, location=None, level=None, size=None, **kwargs) -> CuImage: + assert level is not None with cp.cuda.Device(self.device.device_id): - return self.handle.read_region(location=location, level=level, size=size, - num_workers=max(1, os.cpu_count() // 2), - **kwargs) + if location is not None and size is not None: + return self.handle.read_region(location=location, level=level, size=size, + num_workers=self._num_threads, + **kwargs) + assert location is None and size is None + return self.handle.read_region(level=level, num_workers=self._num_threads, **kwargs) @staticmethod def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray: diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py index 3225583..435d53e 100644 --- a/histoqc/wsi_handles/openslide_handle.py +++ b/histoqc/wsi_handles/openslide_handle.py @@ -29,8 +29,8 @@ def sanitize_device(cls, device: Optional[Device]): def backend_rgba2rgb(self, img) -> PILImage: return rgba2rgb_pil(img, self.background_color) - def __init__(self, fname: str, device_id: Optional[int] = None): - super().__init__(fname, device_id) + def __init__(self, fname: str, device_id: Optional[int] = None, num_threads: Optional[int] = 1): + super().__init__(fname, device_id, num_threads=num_threads) self.handle = openslide.OpenSlide(fname) self._has_bounding_box = True self._bounding_box = self.__get_bounding_box() diff --git a/rmm_log.txt b/rmm_log.txt new file mode 100644 index 0000000..46fb05f --- /dev/null +++ b/rmm_log.txt @@ -0,0 +1,2 @@ +[160761][01:45:16:367101][info ] ----- RMM LOG BEGIN [PTDS DISABLED] ----- +[160761][01:45:16:367132][error ] [A][Stream 0x1][Upstream 10000000000B][FAILURE maximum pool size exceeded]