diff --git a/.gitignore b/.gitignore index 495397d..e9a2dde 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ wheels/ *.egg-info/ *.egg *.whl +*.log .coverage* MANIFEST histoqc/tests/data/* @@ -24,4 +25,9 @@ htmlcov/ histoqc/_version.py environment.yml +*.svs +/histoqc/tests/data/ +.histoqc_pkg_data* +.fuse_hidden* +.pytest* diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py index 323fb1f..f5e1be8 100644 --- a/histoqc/AnnotationModule.py +++ b/histoqc/AnnotationModule.py @@ -1,7 +1,9 @@ import logging from typing import List, Tuple -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte +from histoqc.BaseImage import printMaskHelper, BaseImage +from histoqc.array_adapter import ArrayAdapter, Device +from skimage import io +from skimage.util import img_as_ubyte import os from pathlib import PurePosixPath, Path from shapely.geometry import Polygon @@ -47,13 +49,13 @@ def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollectio return np.array(mask) -def getParams(s, params): +def getParams(s: BaseImage, params): # read params - format: xml, json; file_path; suffix; ann_format = params.get("format", None) file_path = params.get("file_path", None) suffix = params.get("suffix", "") - # try use default value if the params are not provided + # try using default value if the params are not provided if not ann_format: # set default format ann_format = "xml" @@ -73,8 +75,12 @@ def getParams(s, params): return ann_format, file_path, suffix -def saveAnnotationMask(s, params): +def saveAnnotationMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetAnnotationMask") + # quite pointless to enforce GPU acceleration here. Force to use CPU mode + adaptor = ArrayAdapter.build(input_device=Device.build(Device.DEVICE_CPU), + output_device=Device.build(Device.DEVICE_CPU), + contingent_device=Device.build(Device.DEVICE_CPU)) (ann_format, file_path, suffix) = getParams(s, params) @@ -106,13 +112,14 @@ def saveAnnotationMask(s, params): (off_x, off_y, ncol, nrow) = s["img_bbox"] resize_factor = np.shape(s["img_mask_use"])[1] / ncol height, width = s["img_mask_use"].shape + annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0 mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png" io.imsave(mask_file_name, img_as_ubyte(annotationMask)) prev_mask = s["img_mask_use"] - s["img_mask_use"] = prev_mask & annotationMask + s["img_mask_use"] = adaptor.and_(prev_mask, annotationMask) s.addToPrintList("getAnnotationMask", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index 947f7c7..0a8ab55 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -1,26 +1,74 @@ +from __future__ import annotations import logging import os +import sys + import numpy as np -import zlib, dill +import zlib +import traceback +import dill from distutils.util import strtobool -from PIL import Image import re -from typing import Union, Tuple -#os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically -from histoqc.import_wrapper.openslide import openslide - -# there is no branch reset group in re -# compatible with the previous definition of valid input: leading zero and leading decimals are supported +from typing import Union, Tuple, cast, Optional +from histoqc.wsi_handles.base import WSIImageHandle +from histoqc.wsi_handles.constants import KEY_CUCIM +from histoqc.array_adapter.typing import TYPE_ARRAY +from histoqc.array_adapter import ArrayDeviceType _REGEX_MAG = r"^(\d?\.?\d*X?)" _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE) MAG_NA = None +# todo either document or regularize the fields of BaseImage +# class BaseImageData(TypedDict): +# warnings: List[str] +# output: List +# filename: str +# comments: str +# outdir: str +# dir: str +# # width, height +# image_base_size: Tuple[int, int] +# enable_bounding_box: bool +# image_work_size: str | float +# mask_statistics: str +# base_mag: Optional[float] +# img_mask_use: np.ndarray +# img_mask_force: List[str] +# completed: List[str] +# img_bbox: Tuple[int, int, int, int] + class BaseImage(dict): - def __init__(self, fname, fname_outdir, params): - dict.__init__(self) + _image_handle: Optional[WSIImageHandle] + _device_id: Optional[int] + @property + def image_handle(self) -> Optional[WSIImageHandle]: + if hasattr(self, "_image_handle"): + return self._image_handle + return None + + @image_handle.setter + def image_handle(self, image_handle: WSIImageHandle): + self._image_handle = image_handle + + def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None, num_threads: Optional[int] = 1): + dict.__init__(self) + # init + self._device_id = device_id + self._image_handle = None + handles = params.get("handles", KEY_CUCIM) + + # dynamically load wsi image handle + try: + self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, + handles, device_id=device_id, + num_threads=num_threads) + except Exception: + trace_string = traceback.format_exc() + logging.error(f"{__name__}: {fname} -- Error Creating Handle - Traceback: {trace_string}") + sys.exit(1) self.in_memory_compression = strtobool(params.get("in_memory_compression", "False")) self["warnings"] = [''] # this needs to be first key in case anything else wants to add to it @@ -33,27 +81,31 @@ def __init__(self, fname, fname_outdir, params): self["outdir"] = fname_outdir self["dir"] = os.path.dirname(fname) - self["os_handle"] = openslide.OpenSlide(fname) - self["image_base_size"] = self["os_handle"].dimensions - self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box","False")) - # check if the bbox if doesn't have bbox set enable_bounding_box to False + # get handles from config + + self["image_base_size"] = self.image_handle.dimensions + self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box", "False")) + # check: if it doesn't have bbox set enable_bounding_box to False self.setBBox() self.addToPrintList("image_bounding_box", self["img_bbox"]) self["image_work_size"] = params.get("image_work_size", "1.25x") self["mask_statistics"] = params.get("mask_statistics", "relative2mask") - + self["base_mag"] = getMag(self, params) if not self["base_mag"]: - logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?") - return -1 + logging.error( + f"{self['filename']}: Has unknown or uncalculated base magnification," + f" cannot specify magnification scale! Did you try getMag?") + return self.addToPrintList("base_mag", self["base_mag"]) mask_statistics_types = ["relative2mask", "absolute", "relative2image"] - if (self["mask_statistics"] not in mask_statistics_types): + if self["mask_statistics"] not in mask_statistics_types: logging.error( - f"mask_statistic type '{self['mask_statistics']}' is not one of the 3 supported options relative2mask, absolute, relative2image!") + f"mask_statistic type '{self['mask_statistics']}'" + f" is not one of the 3 supported options relative2mask, absolute, relative2image!") exit() self["img_mask_use"] = np.ones(self.getImgThumb(self["image_work_size"]).shape[0:2], dtype=bool) @@ -61,39 +113,49 @@ def __init__(self, fname, fname_outdir, params): self["completed"] = [] + @staticmethod + def is_img_data(key: str) -> bool: + return key.startswith("img") and key != "img_bbox" + + def _sync_to_handle(self, key, value, device: Optional[ArrayDeviceType] = None): + if not self.__class__.is_img_data(key): + return value + if hasattr(self, "_image_handle") and self.image_handle is not None: + device = device if device is not None else self.image_handle.device + value = self.image_handle.adapter.__class__.curate_arrays_device(value, + device=device, copy=False) + return value + def __getitem__(self, key): value = super(BaseImage, self).__getitem__(key) - if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key): value = dill.loads(zlib.decompress(value)) + + value = self._sync_to_handle(key, value) return value def __setitem__(self, key, value): - if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + value = self._sync_to_handle(key, value) + if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key): value = zlib.compress(dill.dumps(value), level=5) - - return super(BaseImage, self).__setitem__(key,value) + return super(BaseImage, self).__setitem__(key, value) # setbounding box start coordinate and size def setBBox(self): # add self["img_bbox"] = (x, y, width, heigh) - osh = self["os_handle"] + image_handle = self.image_handle # set default bbox - (dim_width, dim_height) = osh.dimensions + (dim_width, dim_height) = image_handle.dimensions self["img_bbox"] = (0, 0, dim_width, dim_height) # try to get bbox if bounding_box is ture - if self["enable_bounding_box"]: - # try get bbox from os handle properties - try: - x = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA')) - y = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA')) - width = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA')) - height = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA')) - self["img_bbox"] = (x, y, width, height) - except: - # no bbox info in slide set enable_bounding_box as Flase - self["enable_bounding_box"] = False - logging.warning(f"{self['filename']}: Bounding Box requested but could not read") - self["warnings"].append("Bounding Box requested but could not read") + + # Does WSI have bounding box + if self["enable_bounding_box"] and image_handle.has_bounding_box: + self["img_bbox"] = image_handle.bounding_box + elif self["enable_bounding_box"] and not image_handle.has_bounding_box: + self["enable_bounding_box"] = False + logging.warning(f"{self['filename']}: Bounding Box requested but could not read") + self["warnings"].append("Bounding Box requested but could not read") def addToPrintList(self, name, val): self[name] = val @@ -101,14 +163,14 @@ def addToPrintList(self, name, val): # find the next higher level by giving a downsample factor # return (level, isFindCloseLevel) - def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: - osh = self["os_handle"] - relative_down_factors_idx=[np.isclose(i/downsample_factor,1,atol=.01) for i in osh.level_downsamples] - level=np.where(relative_down_factors_idx)[0] + def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: + osh = self.image_handle + relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in osh.level_downsamples] + level = np.where(relative_down_factors_idx)[0] if level.size: - return (level[0], True) + return cast(int, level[0]), True else: - return (osh.get_best_level_for_downsample(downsample_factor), False) + return osh.get_best_level_for_downsample(downsample_factor), False @staticmethod def is_valid_size(size: str): @@ -123,7 +185,7 @@ def validate_slide_size(size: str, assertion: bool = False): # for now just cast it to str return size - def getImgThumb(self, size: str): + def getImgThumb(self, size: str) -> Optional[TYPE_ARRAY]: # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't # really handle it that way, and trace of previous coding also suggest that there actually lack a params # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here. @@ -133,9 +195,9 @@ def getImgThumb(self, size: str): # return the img if it exists if key in self: return self[key] - + # get open slide handle - osh = self["os_handle"] + image_handle = self.image_handle # get the size of view on current img - the current size of view by using the bounding box. # bounding box could be the size of whole img or read the size from the slide mate data. @@ -157,11 +219,12 @@ def getImgThumb(self, size: str): # magnification base_mag = self["base_mag"] target_sampling_factor = base_mag / target_mag - target_dims = tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int)) - + target_dims = cast(Tuple[int, int], + tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int))) + # generate the thumb img - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) - + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) + # the size of the img is number elif size.replace(".", "0", 1).isdigit(): size = float(size) @@ -169,30 +232,35 @@ def getImgThumb(self, size: str): if size < 1: target_downscaling_factor = size target_sampling_factor = 1 / target_downscaling_factor - target_dims = tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int)) - + target_dims = cast(Tuple[int, int], + tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int))) + # generate the thumb img - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) # specifies a desired level of open slide elif size < 100: target_level = int(size) - if target_level >= osh.level_count: - target_level = osh.level_count - 1 - msg = f"Desired Image Level {size+1} does not exist! Instead using level {osh.level_count-1}! Downstream output may not be correct" - logging.error(f"{self['filename']}: {msg}" ) + if target_level >= image_handle.level_count: + target_level = image_handle.level_count - 1 + msg = (f"Desired Image Level {size + 1} does not exist!" + f" Instead using level {image_handle.level_count - 1}! Downstream output may not be correct") + logging.error(f"{self['filename']}: {msg}") self["warnings"].append(msg) - size = (tuple((np.array(img_base_size)/osh.level_downsamples[target_level]).astype(int)) + size = (tuple((np.array(img_base_size) / image_handle.level_downsamples[target_level]).astype(int)) if self["enable_bounding_box"] - else osh.level_dimensions[target_level]) + else image_handle.level_dimensions[target_level]) logging.info( - f"{self['filename']} - \t\tloading image from level {target_level} of size {osh.level_dimensions[target_level]}") - tile = osh.read_region((bx, by), target_level, size) - self[key] = (np.asarray(rgba2rgb(self, tile)) - if np.shape(tile)[-1]==4 - else np.asarray(tile)) - - # specifies a desired size of thumbnail + f"{self['filename']} - \t\tloading image from level {target_level} of size" + f" {image_handle.level_dimensions[target_level]}") + # PILLOW + tile = image_handle.read_region((bx, by), target_level, size) + + self[key] = (np.asarray(self.image_handle.backend_rgba2rgb(tile)) + if len(tile.getbands()) == 4 + else np.asarray(tile)) + + # specifies a desired size of thumbnail else: # recommend having the dimension is less than 10k if size > 10000: @@ -202,89 +270,19 @@ def getImgThumb(self, size: str): self["warnings"].append(msg) target_dims = getDimensionsByOneDim(self, int(size)) target_sampling_factor = img_base_size[0] / target_dims[0] - self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor) return self[key] -def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float): - osh = s["os_handle"] - - # get thumb from og - if not s["enable_bounding_box"]: - max_dim = dims[0] if dims[0] > dims[1] else dims[1] - return np.array(osh.get_thumbnail((max_dim, max_dim))) - - (level, isExactLevel) = s.getBestLevelForDownsample(target_sampling_factor) - - # check if get the existing level - if isExactLevel: - tile = osh.read_region((x, y), level, dims) - return np.asarray(rgba2rgb(s, tile)) if np.shape(tile)[-1]==4 else np.asarray(tile) - # scale down the thumb img from the next high level - else: - return resizeTileDownward(s, target_sampling_factor, level) - -''' -the followings are helper functions -''' -def resizeTileDownward(self, target_downsampling_factor, level): - osh = self["os_handle"] - (bx, by, bwidth, bheight) = self["img_bbox"] - end_x = bx + bwidth - end_y = by + bheight - - cloest_downsampling_factor = osh.level_downsamples[level] - win_size = 2048 - - # create a new img - output = [] - for x in range(bx, end_x, win_size): - row_piece = [] - for y in range(by, end_y, win_size): - win_width, win_height = [win_size] * 2 - # Adjust extraction size for endcut - if end_x < x + win_width: - win_width = end_x - x - if end_y < y + win_height: - win_height = end_y - y - - - win_down_width = int(round(win_width / target_downsampling_factor)) - win_down_height = int(round(win_height / target_downsampling_factor)) - - win_width = int(round(win_width / cloest_downsampling_factor)) - win_height = int(round(win_height / cloest_downsampling_factor)) - - # TODO Note: this isn't very efficient, and if more efficiency isneeded - # We should likely refactor using "paste" from Image. - # Or even just set the pixels directly with indexing. - cloest_region = osh.read_region((x, y), level, (win_width, win_height)) - if np.shape(cloest_region)[-1]==4: - cloest_region = rgba2rgb(self, cloest_region) - target_region = cloest_region.resize((win_down_width, win_down_height)) - row_piece.append(target_region) - row_piece = np.concatenate(row_piece, axis=0) - - output.append(row_piece) - output = np.concatenate(output, axis=1) - return output - - -def rgba2rgb(s: BaseImage, img): - bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff") - thumb = Image.new("RGB", img.size, bg_color) - thumb.paste(img, None, img) - return thumb - - -def printMaskHelper(type: str, prev_mask, curr_mask): - if type == "relative2mask": + +def printMaskHelper(statistic_type: str, prev_mask, curr_mask): + if statistic_type == "relative2mask": if len(prev_mask.nonzero()[0]) == 0: return str(-100) else: return str(1 - len(curr_mask.nonzero()[0]) / len(prev_mask.nonzero()[0])) - elif type == "relative2image": + elif statistic_type == "relative2image": return str(len(curr_mask.nonzero()[0]) / np.prod(curr_mask.shape)) - elif type == "absolute": + elif statistic_type == "absolute": return str(len(curr_mask.nonzero()[0])) else: return str(-1) @@ -306,29 +304,23 @@ def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]: return MAG_NA # regex determines X must either be abscent or at the end of the string if "X" in mag.upper(): - mag = mag[0:-1] + mag = mag[0: -1] return float(mag) # this function is seperated out because in the future we hope to have automatic detection of # magnification if not present in open slide, and/or to confirm openslide base magnification def getMag(s: BaseImage, params) -> Union[float, None]: - logging.info(f"{s['filename']} - \tgetMag") - osh = s["os_handle"] - mag = osh.properties.get("openslide.objective-power") or \ - osh.properties.get("aperio.AppMag") or MAG_NA - # if mag or strtobool(params.get("confirm_base_mag", "False")): - # # do analysis work here - # logging.warning(f"{s['filename']} - Unknown base magnification for file") - # s["warnings"].append(f"{s['filename']} - Unknown base magnification for file") - # return None - # else: + osh = s.image_handle + mag = osh.magnification or MAG_NA # workaround for unspecified mag -- with or without automatic detection it might be preferred to have # mag predefined mag = mag or parsed_mag(params.get("base_mag")) # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise # the exception here. - return float(mag) if mag is not MAG_NA else MAG_NA + mag = float(mag) if mag is not MAG_NA else MAG_NA + logging.info(f"{s['filename']} - \tgetMag = {mag}") + return mag def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]: diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py index 418c715..94cfe40 100644 --- a/histoqc/BasicModule.py +++ b/histoqc/BasicModule.py @@ -2,38 +2,48 @@ import os from histoqc.BaseImage import printMaskHelper from skimage.morphology import remove_small_objects, binary_opening, disk -from skimage import io, color, img_as_ubyte +from skimage.util import img_as_ubyte +from histoqc.BaseImage import BaseImage -import matplotlib.pyplot as plt - -def getBasicStats(s, params): +def getBasicStats(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetBasicStats") - osh = s["os_handle"] - s.addToPrintList("type", osh.properties.get("openslide.vendor", "NA")) - s.addToPrintList("levels", osh.properties.get("openslide.level-count", "NA")) - s.addToPrintList("height", osh.properties.get("openslide.level[0].height", "NA")) - s.addToPrintList("width", osh.properties.get("openslide.level[0].width", "NA")) - s.addToPrintList("mpp_x", osh.properties.get("openslide.mpp-x", "NA")) - s.addToPrintList("mpp_y", osh.properties.get("openslide.mpp-y", "NA")) - s.addToPrintList("comment", osh.properties.get("openslide.comment", "NA").replace("\n", " ").replace("\r", " ")) + osh = s.image_handle + s.addToPrintList("type", osh.vendor) + s.addToPrintList("levels", osh.level_count) + s.addToPrintList("height", osh.dimensions[1] if len(osh.dimensions) >= 2 else "NA") + s.addToPrintList("width", osh.dimensions[0] if len(osh.dimensions) >= 2 else "NA") + s.addToPrintList("mpp_x", osh.mpp_x) + s.addToPrintList("mpp_y", osh.mpp_y) + comment = osh.comment if osh.comment else "" + s.addToPrintList("comment", comment.replace("\n", " ").replace("\r", " ")) return -def finalComputations(s, params): +def finalComputations(s: BaseImage, params): mask = s["img_mask_use"] s.addToPrintList("pixels_to_use", str(len(mask.nonzero()[0]))) -def finalProcessingSpur(s, params): +def finalProcessingSpur(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingSpur") + + adapter = s.image_handle.adapter disk_radius = int(params.get("disk_radius", "25")) - selem = disk(disk_radius) - mask = s["img_mask_use"] - mask_opened = binary_opening(mask, selem) - mask_spur = ~mask_opened & mask - io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", img_as_ubyte(mask_spur)) + # selem = adapter(disk)(disk_radius) + mask = s["img_mask_use"] + mask_opened = adapter(binary_opening)(mask, footprint=disk(disk_radius)) + # todo: it is safe to directly compare + # todo: ~mask_opened & mask directly as the device of both are synchronized by adapter. + # todo: but this assumes that an adapter is used in the module so + # todo: for now unless we implement an array proxy the best practice is to use explicit and_ method + # todo: to avoid mistakes + mask_spur = adapter.and_(~mask_opened, mask) + fname = os.path.join(s["outdir"], f"{s['filename']}_spur.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_spur)) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", ArrayAdapter.move_to_device(mask_spur_ubyte, + # ArrayDevice.CPU)) prev_mask = s["img_mask_use"] s["img_mask_use"] = mask_opened @@ -43,20 +53,25 @@ def finalProcessingSpur(s, params): if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BasicModule.finalProcessingSpur" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"After BasicModule.finalProcessingSpur" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") -def finalProcessingArea(s, params): +def finalProcessingArea(s: BaseImage, params): logging.info(f"{s['filename']} - \tfinalProcessingArea") + + adapter = s.image_handle.adapter area_thresh = int(params.get("area_threshold", "1000")) mask = s["img_mask_use"] - mask_opened = remove_small_objects(mask, min_size=area_thresh) - mask_removed_area = ~mask_opened & mask - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area)) + mask_opened = adapter(remove_small_objects)(mask, min_size=area_thresh) + mask_removed_area = adapter.and_(~mask_opened, mask) + fname = os.path.join(s["outdir"], f"{s['filename']}_areathresh.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_removed_area)) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area)) prev_mask = s["img_mask_use"] s["img_mask_use"] = mask_opened > 0 @@ -66,6 +81,8 @@ def finalProcessingArea(s, params): if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BasicModule.finalProcessingArea" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"After BasicModule.finalProcessingArea" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py index 8c55890..4e173c6 100644 --- a/histoqc/BlurDetectionModule.py +++ b/histoqc/BlurDetectionModule.py @@ -2,64 +2,74 @@ import os import skimage -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte, morphology, measure +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import morphology, measure +from skimage.util import img_as_ubyte from skimage.color import rgb2gray -from skimage.filters import rank import numpy as np -import matplotlib.pyplot as plt - - # Analysis of focus measure operators for shape-from-focus # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012 # https://pdfs.semanticscholar.org/8c67/5bf5b542b98bf81dcf70bd869ab52ab8aae9.pdf -def identifyBlurryRegions(s, params): +def identifyBlurryRegions(s: BaseImage, params): logging.info(f"{s['filename']} - \tidentifyBlurryRegions") - + adapter = s.image_handle.adapter blur_radius = int(params.get("blur_radius", 7)) blur_threshold = float(params.get("blur_threshold", .1)) + img_thumb = s.getImgThumb(params.get("image_work_size", "2.5x")) + + img = adapter(rgb2gray)(img_thumb) + # use the __abs__ interface + + logging.debug(f"{s['filename']} - \tidentifyBlurryRegions Gray:" + f" {img.max(), img.min(), blur_radius, blur_threshold}") + + img_laplace = abs(adapter(skimage.filters.laplace)(img)) - img = s.getImgThumb(params.get("image_work_size", "2.5x")) - img = rgb2gray(img) - img_laplace = np.abs(skimage.filters.laplace(img)) - mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold + logging.debug(f"{s['filename']} - \tidentifyBlurryRegions img_laplace: {img_laplace.max(), img_laplace.min()}") + mask = adapter(skimage.filters.gaussian)(img_laplace, sigma=blur_radius) <= blur_threshold - mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, - 1] # for some reason resize takes a grayscale and produces a 3chan - mask = s["img_mask_use"] & (mask > 0) + # for some reason resize takes a grayscale and produces a 3chan + # Note: the reason you obtain a 3chan is that you specified a 3chan output shape + mask_resized_shape = s.getImgThumb(s["image_work_size"]).shape[:2] + mask = adapter(skimage.transform.resize)(mask, output_shape=mask_resized_shape, order=0) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask)) + mask = adapter.and_(s["img_mask_use"], mask > 0) + + fname = os.path.join(s["outdir"], f"{s['filename']}_blurry.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) s["img_mask_blurry"] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_blurry"] + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_blurry"]) + + labeled_mask = adapter(morphology.label)(mask) + rps = adapter(measure.regionprops)(labeled_mask) - rps = measure.regionprops(morphology.label(mask)) if rps: - areas = np.asarray([rp.area for rp in rps]) + # scalar stats --> CPU is sufficient. + # use float to cast cp.array(scalar) to python's float + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() else: nobj = area_max = area_mean = 0 - - s.addToPrintList("blurry_removed_num_regions", str(nobj)) s.addToPrintList("blurry_removed_mean_area", str(area_mean)) s.addToPrintList("blurry_removed_max_area", str(area_max)) - s.addToPrintList("blurry_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning( - f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions " + f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append( - f"After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") - + f"After BlurDetectionModule.identifyBlurryRegions" + f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail") return diff --git a/histoqc/BrightContrastModule.py b/histoqc/BrightContrastModule.py index 89dbfbd..a6e1ebe 100644 --- a/histoqc/BrightContrastModule.py +++ b/histoqc/BrightContrastModule.py @@ -3,9 +3,46 @@ from skimage.filters import sobel from skimage.color import convert_colorspace, rgb2gray from distutils.util import strtobool +from histoqc.BaseImage import BaseImage +from histoqc.array_adapter.typing import TYPE_ARRAY +_EPS = np.finfo(np.float32).eps -def getBrightnessGray(s, params): + +def _rms(img: TYPE_ARRAY) -> float: + """Root Mean Square contrast for non-empty masked images + Args: + img: Input image or a vector yielded from masked image. Not Tissue-less + Returns: + + """ + assert img.size > 0 + err = (img - img.mean()) ** 2 + return float(np.sqrt(err.sum() / img.size)) + + +def _michelson(img: TYPE_ARRAY) -> float: + """Helper function for non-empty masked input + Args: + img: + Returns: + + """ + assert img.size > 0 + max_img = img.max() + min_img = img.min() + # add eps to avoid nan when max and min are both 0. + denominator = max_img + min_img + denominator = denominator if denominator != 0 else denominator + _EPS + return float((max_img - min_img) / denominator) + + +def _tenengrad_from_sobel2(sobel_img2: TYPE_ARRAY): + return np.sqrt(sobel_img2.sum()) / sobel_img2.size + + +def getBrightnessGray(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix+"_" if prefix else "" logging.info(f"{s['filename']} - \tgetContrast:{prefix}") @@ -16,11 +53,11 @@ def getBrightnessGray(s, params): img = s.getImgThumb(s["image_work_size"]) - img_g = rgb2gray(img) + img_g = adapter(rgb2gray)(img) if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] - + mask = adapter.sync(mask) img_g = img_g[mask] if img_g.size == 0: img_g = np.array(-100) @@ -31,7 +68,8 @@ def getBrightnessGray(s, params): return -def getBrightnessByChannelinColorSpace(s, params): +def getBrightnessByChannelinColorSpace(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -43,19 +81,19 @@ def getBrightnessByChannelinColorSpace(s, params): invert = strtobool(params.get("invert", "False")) - img = s.getImgThumb(s["image_work_size"]) suffix = "" - if (to_color_space != "RGB"): - img = convert_colorspace(img, "RGB", to_color_space) + if to_color_space != "RGB": + img = adapter(convert_colorspace)(img, fromspace="RGB", tospace=to_color_space) suffix = "_" + to_color_space for chan in range(0, 3): vals = img[:, :, chan] - if (limit_to_mask): + if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] + mask = adapter.sync(mask) vals = vals[mask] if vals.size == 0: @@ -67,7 +105,8 @@ def getBrightnessByChannelinColorSpace(s, params): return -def getContrast(s, params): +def getContrast(s: BaseImage, params): + adapter = s.image_handle.adapter prefix = params.get("prefix", None) prefix = prefix + "_" if prefix else "" @@ -77,27 +116,26 @@ def getContrast(s, params): invert = strtobool(params.get("invert", "False")) - img = s.getImgThumb(s["image_work_size"]) - img = rgb2gray(img) - - sobel_img = sobel(img) ** 2 + img = adapter(rgb2gray)(img) + # noinspection PyTypeChecker + sobel_img2 = adapter(sobel)(img) ** 2 + mask = None if limit_to_mask: mask = s[mask_name] if not invert else ~s[mask_name] - - sobel_img = sobel_img[mask] + img, sobel_img2, mask = adapter.device_sync_all(img, sobel_img2, mask) + sobel_img2 = sobel_img2[mask] img = img[s["img_mask_use"]] - if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to - # produce metrics for completeness with warning + if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to + # produce metrics for completeness with warning - s.addToPrintList(f"{prefix}tenenGrad_contrast", str(-100)) + s.addToPrintList(f"{prefix}tenen_grad_contrast", str(-100)) s.addToPrintList(f"{prefix}michelson_contrast", str(-100)) s.addToPrintList(f"{prefix}rms_contrast", str(-100)) - logging.warning(f"{s['filename']} - After BrightContrastModule.getContrast: NO tissue " f"detected, statistics are impossible to compute, defaulting to -100 !") s["warnings"].append(f"After BrightContrastModule.getContrast: NO tissue remains " @@ -105,19 +143,19 @@ def getContrast(s, params): return - # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask - tenenGrad_contrast = np.sqrt(np.sum(sobel_img)) / img.size - s.addToPrintList(f"{prefix}tenenGrad_contrast", str(tenenGrad_contrast)) + + # np.sqrt(sobel_img2.sum()) / sobel_img2.size + # np.sqrt(np.sum(sobel_img)) / img.size + tenen_grad_contrast = _tenengrad_from_sobel2(sobel_img2=sobel_img2) + + s.addToPrintList(f"{prefix}tenen_grad_contrast", str(tenen_grad_contrast)) # Michelson contrast - max_img = img.max() - min_img = img.min() - contrast = (max_img - min_img) / (max_img + min_img) + contrast = _michelson(img) s.addToPrintList(f"{prefix}michelson_contrast", str(contrast)) # RMS contrast - rms_contrast = np.sqrt(pow(img - img.mean(), 2).sum() / img.size) + rms_contrast = _rms(img) s.addToPrintList(f"{prefix}rms_contrast", str(rms_contrast)) - return diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py index afd62f5..da253c5 100644 --- a/histoqc/BubbleRegionByRegion.py +++ b/histoqc/BubbleRegionByRegion.py @@ -1,50 +1,36 @@ import logging import os -import sys -from ast import literal_eval as make_tuple - -from distutils.util import strtobool from histoqc.BaseImage import printMaskHelper +from scipy.signal import convolve2d -import scipy.signal - -from skimage import io, img_as_ubyte -from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from skimage.util import img_as_ubyte +from skimage.filters import frangi from skimage.color import rgb2gray -from skimage.morphology import remove_small_objects, disk, binary_opening -from skimage.feature import local_binary_pattern - -from skimage.transform import rescale, resize, downscale_local_mean - -from math import ceil - -from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier - -from skimage import io, color - - +from skimage.morphology import remove_small_objects +from histoqc.BaseImage import BaseImage +from skimage import color import numpy as np -import matplotlib.pyplot as plt global_holder = {} -#WARNING: Not as robust as other modules -def roiWise(s, params): + +# WARNING: Not as robust as other modules +def roiWise(s: BaseImage, params): + adapter = s.image_handle.adapter name = params.get("name", "classTask") print("\tpixelWise:\t", name, end="") - level = int(params.get("level", 1)) - win_size = int(params.get("win_size", 2048)) #the size of the ROI which will be iteratively considered + win_size = int(params.get("win_size", 2048)) # the size of the ROI which will be iteratively considered - osh = s["os_handle"] + osh = s.image_handle dim_base = osh.level_dimensions[0] + level = min(int(params.get("level", 1)), len(osh.level_dimensions) - 1) dims = osh.level_dimensions[level] - ratio_x = dim_base[0] / dims[0] #figure out the difference between desi + ratio_x = dim_base[0] / dims[0] # figure out the difference between desi ratio_y = dim_base[1] / dims[1] frangi_scale_range = (1, 6) @@ -58,71 +44,73 @@ def roiWise(s, params): row_piece = [] print('.', end='', flush=True) for y in range(0, dim_base[1], round(win_size * ratio_y)): - region = np.asarray(osh.read_region((x, y), 1, (win_size, win_size))) - region = region[:, :, 0:3] # remove alpha channel - g = rgb2gray(region) - feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges) + # todo: confirm -- the original level is hardcoded to be 1, shouldn't it be the level variable? + + region = osh.region_backend((x, y), level, (win_size, win_size)) + region = osh.backend_to_array(region, osh.device)[..., :3] + g = adapter(rgb2gray)(region) + # todo -- forward compatibility. Later version of frangi alters the signatures + sigmas = frangi_scale_range + (frangi_scale_step,) + feat = adapter(frangi)(g, sigmas=sigmas, beta=frangi_beta1, + gamma=frangi_beta2, black_ridges=frangi_black_ridges) + feat = feat / 8.875854409275627e-08 - region_mask = np.bitwise_and(g < .3, feat > 5) - region_mask = remove_small_objects(region_mask, min_size=100, in_place=True) - # region_std = region.std(axis=2) - # region_gray = rgb2gray(region) - # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255) - # region_mask = scipy.ndimage.morphology.binary_dilation(region_mask, iterations=1) - # region_mask = resize(region_mask , (region_mask.shape[0] / 2, region_mask.shape[1] / 2)) + region_mask = adapter.and_(g < .3, feat > 5) + region_mask = adapter(remove_small_objects)(region_mask, min_size=100) row_piece.append(region_mask) + # sanity check: force to synchronize the device + row_piece = adapter.device_sync_all(*row_piece) row_piece = np.concatenate(row_piece, axis=0) - mask.append(row_piece) + mask.append(row_piece) + mask = adapter.device_sync_all(*mask) mask = np.concatenate(mask, axis=1) if params.get("area_threshold", "") != "": - mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), in_place=True) + # forward compatibility + # inplace=True is equivalent to out=mask. Therefore, it is removed in future version + mask = adapter(remove_small_objects)(mask, min_size=int(params.get("area_threshold", "")), out=mask) s.addToPrintList(name, str(mask.mean())) - #TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask - #s.addToPrintList(name, - # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) #.astype(np.uint8) * 255) - + fname = os.path.join(s["outdir"], f"{s['filename']}_BubbleBounds.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) return -def detectSmoothness(s, params): - logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") - thresh = float(params.get("threshold", ".01" )) - kernel_size = int(params.get("kernel_size", "10")) - min_object_size = int(params.get("min_object_size", "100")) +def detectSmoothness(s: BaseImage, params): - img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) + adapter = s.image_handle.adapter + logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") + thresh = float(params.get("threshold", ".01")) + kernel_size = int(params.get("kernel_size", "10")) + min_object_size = int(params.get("min_object_size", "100")) + img = s.getImgThumb(s["image_work_size"]) + img = adapter(color.rgb2gray)(img) + avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) - imf = scipy.signal.convolve2d(img, avg, mode="same") - mask_flat = abs(imf - img) < thresh + imf = adapter(convolve2d)(img, in2=avg, mode="same") - mask_flat = remove_small_objects(mask_flat, min_size=min_object_size) - mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size) + mask_flat = abs(imf - img) < thresh - prev_mask = s["img_mask_use"] - s["img_mask_flat"] = mask_flat + mask_flat = adapter(remove_small_objects)(mask_flat, min_size=min_object_size) + mask_flat = ~adapter(remove_small_objects)(~mask_flat, min_size=min_object_size) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask)) + prev_mask = s["img_mask_use"] + s["img_mask_flat"] = mask_flat + fname = os.path.join(s["outdir"], f"{s['filename']}_flat.png") + flat_out = adapter.and_(mask_flat, prev_mask) + adapter.imsave(fname, adapter(img_as_ubyte)(flat_out)) - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] + s.addToPrintList("flat_areas", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, + s["img_mask_use"])) - s.addToPrintList("flat_areas", - printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, - s["img_mask_use"])) - - if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty - logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue " - f"remains detectable! Downstream modules likely to be incorrect/fail") - s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " - f"detectable! Downstream modules likely to be incorrect/fail") - - return - + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + return diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 9567a49..ef72dae 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -2,58 +2,62 @@ import os import re import sys - +from histoqc.array_adapter import ArrayAdapter, Device from ast import literal_eval as make_tuple - +from dask.distributed import Lock from distutils.util import strtobool -from histoqc.BaseImage import printMaskHelper -from skimage import io, img_as_ubyte, img_as_bool -from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io +from skimage.util import img_as_ubyte, img_as_bool +from skimage.filters import gabor, frangi, gaussian, median, laplace from skimage.color import rgb2gray from skimage.morphology import remove_small_objects, disk, dilation from skimage.feature import local_binary_pattern -from scipy import ndimage as ndi - from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier import numpy as np -import matplotlib.pyplot as plt - -def pixelWise(s, params): +def pixelWise(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tpixelWise:\t", name) thresh = float(params.get("threshold", .5)) fname = params.get("tsv_file", "") + if fname == "": logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name) sys.exit(1) - return + model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1) - img = s.getImgThumb(s["image_work_size"]) + # todo no formal support for GNB now + # todo Possible solution: sklearn with array-api-compat and implement a wrapper into the ArrayAdaptor + # todo Also: need to rework the GaussianNB.fit interface into a wrapper. + device = s.image_handle.device_type + adapter: ArrayAdapter = s.image_handle.adapter + img = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]), + device=Device.build(Device.DEVICE_CPU)) gnb = GaussianNB() - gnb.fit(model_vals[:, 1:], model_vals[:, 0]) - cal = gnb.predict_proba(img.reshape(-1, 3)) + adapter(gnb.fit)(model_vals[:, 1:], model_vals[:, 0]) + cal = adapter(gnb.predict_proba)(img.reshape(-1, 3)) cal = cal.reshape(img.shape[0], img.shape[1], 2) mask = cal[:, :, 1] > thresh - mask = s["img_mask_use"] & (mask > 0) + mask = adapter.and_(s["img_mask_use"], mask > 0) s.addToPrintList(name, str(mask.mean())) - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, img_as_ubyte(mask)) s["img_mask_" + name] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name]) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -70,59 +74,83 @@ def pixelWise(s, params): # extract_patches_2d(image, patch_size, max_patches=None, random_state=None def compute_rgb(img, params): - return img + adapter = params["adapter"] + return adapter.sync(img) def compute_laplace(img, params): laplace_ksize = int(params.get("laplace_ksize", 3)) - return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] + adapter = params["adapter"] + logging.debug(f"{__name__} - NaN check laplace img: {np.isnan(img).any()}") + # adapter.imsave(f"~/dbg/{params['filename']}_img_laplace.png", img) + img_gray = adapter(rgb2gray)(img) + # adapter.imsave(f"~/dbg/{params['filename']}_gray_laplace.png", adapter(img_as_ubyte)(img_gray)) + # return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] + logging.debug(f"{__name__} - NaN check laplace gray: {np.isnan(img_gray).any()}, {type(img)}") + feat = adapter(laplace)(img_gray, ksize=laplace_ksize) + logging.debug(f"{__name__} - NaN check laplace feat: {np.isnan(feat).any()}") + return feat[:, :, None] def compute_lbp(img, params): lbp_radius = float(params.get("lbp_radius", 3)) lbp_points = int(params.get("lbp_points", 24)) # example sets radius * 8 lbp_method = params.get("lbp_method", "default") - - return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] + # todo: currently no LBP implemented + adapter: ArrayAdapter = params['adapter'] + img_gray = adapter(rgb2gray)(img) + # return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] + return adapter(local_binary_pattern)(img_gray, P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] def compute_gaussian(img, params): + adapter = params["adapter"] gaussian_sigma = int(params.get("gaussian_sigma", 1)) - gaussian_multichan = strtobool(params.get("gaussian_multichan", False)) - - if (gaussian_multichan): - return gaussian(img, sigma=gaussian_sigma, multichannel=gaussian_multichan) + gaussian_multichan = strtobool(params.get("gaussian_multichan", "False")) + # todo: forward compatibility + # todo: after 0.19 default multichannel behavior is fixed and explicitly setting channel_axis is preferred. + # todo: multichannel is also deprecated in later versions + if gaussian_multichan: + return adapter(gaussian)(img, sigma=gaussian_sigma, channel_axis=-1) else: - return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None] + img_gray = adapter(rgb2gray)(img) + return adapter(gaussian)(img_gray, sigma=gaussian_sigma)[:, :, None] def compute_median(img, params): median_disk_size = int(params.get("median_disk_size", 3)) - return median(rgb2gray(img), selem=disk(median_disk_size))[:, :, None] + # starting from 0.19, selem is deprecated and footprint is preferred. + adapter: ArrayAdapter = params['adapter'] + imgg = adapter(rgb2gray)(img) + footprint = adapter(disk)(median_disk_size) + return adapter(median)(imgg, footprint=footprint)[:, :, None] def compute_gabor(img, params): - if not params["shared_dict"].get("gabor_kernels", False): - gabor_theta = int(params.get("gabor_theta", 4)) - gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)")) - gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)")) - - kernels = [] - for theta in range(gabor_theta): - theta = theta / 4. * np.pi - for sigma in gabor_sigma: - for frequency in gabor_frequency: - kernel = np.real(gabor_kernel(frequency, theta=theta, - sigma_x=sigma, sigma_y=sigma)) - kernels.append(kernel) - params["shared_dict"]["gabor_kernels"] = kernels - - kernels = params["shared_dict"]["gabor_kernels"] - imgg = rgb2gray(img) - feats = np.zeros((imgg.shape[0], imgg.shape[1], len(kernels)), dtype=np.double) - for k, kernel in enumerate(kernels): - filtered = ndi.convolve(imgg, kernel, mode='wrap') - feats[:, :, k] = filtered + adapter = params["adapter"] + # if not params["shared_dict"].get("gabor_kernels", False): + # todo: the benefit of caching the gabor_kernel is marginal as the computational head to obtain the kernel itself + # todo: is neglectable + gabor_theta = int(params.get("gabor_theta", 4)) + gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)")) + gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)")) + + # kernels = [] + fts = [] + for theta in range(gabor_theta): + theta = theta / 4. * np.pi + for sigma in gabor_sigma: + for frequency in gabor_frequency: + + fts.append((frequency, theta, sigma)) + + imgg = adapter(rgb2gray)(img) + feats = np.zeros((imgg.shape[0], imgg.shape[1], len(fts)), dtype=np.double) + feats = adapter.sync(feats) + + for idx, (freq, tht, sig) in enumerate(fts): + filtered, _ = adapter(gabor)(imgg, theta=tht, sigma_x=sig, sigma_y=sig, frequency=freq, mode='wrap') + feats[:, :, idx] = filtered return feats @@ -132,22 +160,115 @@ def compute_frangi(img, params): frangi_beta1 = float(params.get("frangi_beta1", .5)) frangi_beta2 = float(params.get("frangi_beta2", 15)) frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True")) - feat = frangi(rgb2gray(img), scale_range = frangi_scale_range, scale_step =frangi_scale_step, beta =frangi_beta1, gamma=frangi_beta2, black_ridges =frangi_black_ridges) + sigmas = frangi_scale_range + (frangi_scale_step,) + + adapter: ArrayAdapter = params["adapter"] + # adapter.imsave(f"~/dbg/{params['filename']}_img_frangi.png", img) + logging.debug(f"{__name__} - NaN check frangi img: {np.isnan(img).any()}") + img_gray = adapter(rgb2gray)(img) + # adapter.imsave(f"~/dbg/{params['filename']}_gray_frangi.png", adapter(img_as_ubyte)(img_gray)) + logging.debug(f"{__name__} - NaN check frangi gray: {np.isnan(img_gray).any()}") + feat = adapter(frangi)(img_gray, sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, + black_ridges=frangi_black_ridges) + # feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, + # black_ridges=frangi_black_ridges) + logging.debug(f"{__name__} - NaN check frangi feat: {np.isnan(feat).any()}") return feat[:, :, None] # add singleton dimension def compute_features(img, params): features = params.get("features", "") - + adapter = params["adapter"] feats = [] for feature in features.splitlines(): func = getattr(sys.modules[__name__], f"compute_{feature}") feats.append(func(img, params)) - + feats = adapter.device_sync_all(*feats) + # cupy can be implicitly concatenated using np's API return np.concatenate(feats, axis=2) -def byExampleWithFeatures(s, params): +def new_clf_model(s: BaseImage, params): + name = params.get("name", "classTask") + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") + adapter = s.image_handle.adapter + model_vals = [] + model_labels = adapter.sync(np.empty([0, 1])) + nsamples_per_example = float(params.get("nsamples_per_example", -1)) + + for ex in params["examples"].splitlines(): + ex = re.split(r'(? 1 else int(mask.shape[0] + * nsamples_per_example) + idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) + eximg = eximg[idxkeep, :] + mask = mask[idxkeep] + + model_vals.append(eximg) + # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray + # but we explicitly sync the device of mask and model_labels anyway + model_labels = np.vstack((model_labels, mask)) + + model_vals = np.vstack(model_vals) + clf = RandomForestClassifier(n_jobs=-1) + # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}") + model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels, + device=Device.build(Device.DEVICE_CPU), copy=True) + adapter(clf.fit)(model_vals, y=model_labels.ravel()) + # clf.fit(model_vals, y=model_labels.ravel() + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") + return clf + + +def _cache_clf(s: BaseImage, params, var_name): # f"model_{name}" + # name = params.get("name", "classTask") + # model_var = Variable(var_name) + + with Lock(var_name): # this lock is shared across all threads such that only one thread needs to train the model + # then it is shared with all other modules + if not params["shared_dict"].get(var_name, False): + clf = new_clf_model(s, params) + logging.info(f"{s['filename']} - saving to cache...") + params["shared_dict"][var_name] = clf + logging.info(f"{s['filename']} - cached...") + + logging.info(f"{s['filename']} - Lock Released...") + # try: + # clf = model_var.get() + # except ValueError: + # clf = new_clf_model(s, params) + # model_var.set(clf) + + +def get_clf(s: BaseImage, params): + name = params.get("name", "classTask") + var_name = f"model_{name}" + _cache_clf(s, params, var_name) + # clf = Variable(var_name).get() # params["shared_dict"]["model_" + name] + clf = params["shared_dict"]["model_" + name] + return clf + + +def byExampleWithFeatures(s: BaseImage, params): name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") @@ -158,79 +279,50 @@ def byExampleWithFeatures(s, params): if examples == "": logging.error(f"{s['filename']} - No examples provided in ClassificationModule.byExample for {name} !!") sys.exit(1) - return if params.get("features", "") == "": logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!") sys.exit(1) - return - with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model - # then it is shared with all other modules - if not params["shared_dict"].get("model_" + name, False): - logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") - - model_vals = [] - model_labels = np.empty([0, 1]) - - for ex in params["examples"].splitlines(): - ex = re.split(r'(? 1 else int(mask.shape[0]*nsamples_per_example) - idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) - eximg = eximg[idxkeep, :] - mask = mask[idxkeep] - - - model_vals.append(eximg) - model_labels = np.vstack((model_labels, mask)) - - # do stuff here with model_vals - model_vals = np.vstack(model_vals) - clf = RandomForestClassifier(n_jobs=-1) - clf.fit(model_vals, model_labels.ravel()) - params["shared_dict"]["model_" + name] = clf - logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") + adapter = s.image_handle.adapter + params['adapter'] = adapter + params['filename'] = s['filename'] - clf = params["shared_dict"]["model_" + name] + clf = get_clf(s, params) + logging.info(f"{__name__} - {s['filename']} Infer with the trained model.") img = s.getImgThumb(s["image_work_size"]) feats = compute_features(img, params) - cal = clf.predict_proba(feats.reshape(-1, feats.shape[2])) + logging.debug(f"{__name__} - {s['filename']} - NaN check img: {np.isnan(img).any()}") + logging.debug(f"{__name__} - {s['filename']} - NaN check feats: {np.isnan(feats).any()}") + + feats = feats.reshape(-1, feats.shape[2]) + feats = adapter.curate_arrays_device(feats, device=Device.build(Device.DEVICE_CPU), copy=True) + cal = adapter(clf.predict_proba)(feats) cal = cal.reshape(img.shape[0], img.shape[1], 2) mask = cal[:, :, 1] > thresh - area_thresh = int(params.get("area_threshold", "5")) if area_thresh > 0: - mask = remove_small_objects(mask, min_size=area_thresh, in_place=True) + # inplace=True is redundant and deprecated. + mask = adapter(remove_small_objects)(mask, min_size=area_thresh, out=mask) dilate_kernel_size = int(params.get("dilate_kernel_size", "0")) if dilate_kernel_size > 0: - mask = dilation(mask, selem=np.ones((dilate_kernel_size, dilate_kernel_size))) + mask = adapter(dilation)(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size))) mask = s["img_mask_use"] & (mask > 0) + # mask_ubyte = adapter.move_to_device(adapter(img_as_ubyte)(mask), device=ArrayDevice.CPU) + # io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", mask_ubyte) + + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) s["img_mask_" + name] = (mask * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + + # todo: now the BaseImage will explicitly set/get img_* keys (except bbox) to the corresponding img handle device + # todo: however I think it could be better to also explicitly let the adapter to handle all binary operations. + s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name]) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -243,5 +335,4 @@ def byExampleWithFeatures(s, params): s["img_mask_force"].append("img_mask_" + name) s["completed"].append(f"byExampleWithFeatures:{name}") - return diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py index dbb41f1..91a943e 100644 --- a/histoqc/DeconvolutionModule.py +++ b/histoqc/DeconvolutionModule.py @@ -2,59 +2,54 @@ import os import sys import numpy as np -from skimage import io, color, img_as_ubyte -from skimage.exposure import rescale_intensity +from skimage.util import img_as_ubyte +from histoqc.BaseImage import BaseImage from skimage.color import separate_stains -from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb -from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \ - hpx_from_rgb # need to load all of these in case the user selects them from distutils.util import strtobool +from histoqc.import_wrapper import dynamic_import -import matplotlib.pyplot as plt - -def separateStains(s, params): +def separateStains(s: BaseImage, params): logging.info(f"{s['filename']} - \tseparateStains") + adapter = s.image_handle.adapter stain = params.get("stain", "") use_mask = strtobool(params.get("use_mask", "True")) if stain == "": logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains") sys.exit(1) - return - stain_matrix = getattr(sys.modules[__name__], stain, None) - - if stain_matrix is None: + try: + stain_matrix = dynamic_import("skimage.color", stain, return_first=True) + except ImportError: logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") sys.exit(1) - return - mask = s["img_mask_use"] + adapter.sync(stain_matrix) + mask = adapter.sync(s["img_mask_use"]) - if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early + if use_mask and len(mask.nonzero()[0]) == 0: # -- lets just error check at the top if mask is empty and abort early for c in range(3): s.addToPrintList(f"deconv_c{c}_std", str(-100)) s.addToPrintList(f"deconv_c{c}_mean", str(-100)) - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) + fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png") + adapter.imsave(fname, img_as_ubyte(np.zeros(mask.shape))) logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " - f"remains detectable! Saving Black images") + f"remains detectable! Saving Black images") s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue " f"remains detectable! Saving Black images") return img = s.getImgThumb(s["image_work_size"]) - dimg = separate_stains(img, stain_matrix) + dimg = adapter(separate_stains)(img, conv_matrix=stain_matrix) for c in range(0, 3): dc = dimg[:, :, c] - clip_max_val = np.quantile(dc.flatten(), .99) dc = np.clip(dc, a_min=0, a_max=clip_max_val) - if use_mask: dc_sub = dc[mask] dc_min = dc_sub.min() @@ -71,6 +66,6 @@ def separateStains(s, params): s.addToPrintList(f"deconv_c{c}_std", str(dc.std())) dc = (dc - dc_min) / float(dc_max - dc_min) * mask - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc)) - + fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(dc)) return diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py index e58707c..0dd8a21 100644 --- a/histoqc/HistogramModule.py +++ b/histoqc/HistogramModule.py @@ -4,18 +4,29 @@ from skimage import io import matplotlib.pyplot as plt from distutils.util import strtobool +from histoqc.BaseImage import BaseImage +from typing import Union +from histoqc.array_adapter import ArrayAdapter, Device +from histoqc.array_adapter.typing import TYPE_ARRAY +# todo: beware that because there is no lock, it is likely that each worker will compute the template of their own. +# this holds a local copy of the histograms of the template images so that they need only be computed once +global_holder = {} -global_holder = {} #this holds a local copy of the histograms of the template images so that they need only be computed once - -def getHistogram(s, params): +def getHistogram(s: BaseImage, params): logging.info(f"{s['filename']} - \tgetHistogram") + # adapter = s.image_handle.adapter limit_to_mask = strtobool(params.get("limit_to_mask", True)) bins = int(params.get("bins", 20)) img = s.getImgThumb(s["image_work_size"]) + tissue_mask = s["img_mask_use"] + # matplotlib --> pointless to use GPU here even if a corresponding API exists + img, tissue_mask = ArrayAdapter.curate_arrays_device(img, tissue_mask, + device=Device.build(Device.DEVICE_CPU)) + # tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU) if limit_to_mask: - img = img[s["img_mask_use"]] + img = img[tissue_mask] else: img = img.reshape(-1, 3) @@ -26,53 +37,61 @@ def getHistogram(s, params): ax.set_title('Color Distribution for ' + s["filename"]) ax.set_xlabel('Pixel Val') ax.set_ylabel('Density') - plt.savefig(s["outdir"] + os.sep + s["filename"] + "_hist.png") + fname = os.path.join(s["outdir"], f"{s['filename']}_hist.png") + plt.savefig(fname) plt.close() return -def computeHistogram(img, bins, mask=-1): +def computeHistogram(img: TYPE_ARRAY, bins: int, + adapter: ArrayAdapter, mask: Union[TYPE_ARRAY, int] = -1) -> TYPE_ARRAY: result = np.zeros(shape=(bins, 3)) + img, mask = adapter.device_sync_all(img, mask) + result = adapter.sync(result) for chan in range(0, 3): vals = img[:, :, chan].flatten() - if (isinstance(mask, np.ndarray)): - vals = vals[mask.flatten()] + if ArrayAdapter.is_array(mask): - result[:, chan] = np.histogram(vals, bins=bins, density=True, range=[0, 255])[0] + vals = vals[mask.flatten()] + result[:, chan] = np.histogram(vals, bins=bins, density=True, range=(0, 255))[0] return result -def compareToTemplates(s, params): +def compareToTemplates(s: BaseImage, params): logging.info(f"{s['filename']} - \tcompareToTemplates") + adapter = s.image_handle.adapter bins = int(params.get("bins", 20)) limit_to_mask = strtobool(params.get("limit_to_mask", True)) - if (not global_holder.get("templates", False)): #if the histograms haven't already been computed, compute and store them now + # if the histograms haven't already been computed, compute and store them now + if not global_holder.get("templates", False): templates = {} for template in params["templates"].splitlines(): - templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins) + templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), + bins, adapter) # compute each of their histograms global_holder["templates"] = templates img = s.getImgThumb(s["image_work_size"]) - if (limit_to_mask): + if limit_to_mask: mask = s["img_mask_use"] if len(mask.nonzero()[0]) == 0: logging.warning(f"{s['filename']} - HistogramModule.compareToTemplates NO tissue " f"remains detectable in mask!") s["warnings"].append(f"HistogramModule.compareToTemplates NO tissue " - f"remains detectable in mask!") + f"remains detectable in mask!") - imghst = np.zeros((bins,3)) + imghst = np.zeros((bins, 3)) else: - imghst = computeHistogram(img, bins, mask) + imghst = computeHistogram(img, bins, adapter, mask) else: - imghst = computeHistogram(img, bins) + imghst = computeHistogram(img, bins, adapter) for template in global_holder["templates"]: - val = np.sum(pow(abs(global_holder["templates"][template] - imghst), 2)) + hist_diff = adapter.sub(global_holder["templates"][template], imghst) + val = (abs(hist_diff) ** 2).sum() s.addToPrintList(template + "_MSE_hist", str(val)) return diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py index 1f8d306..fc9f5c4 100644 --- a/histoqc/LightDarkModule.py +++ b/histoqc/LightDarkModule.py @@ -1,40 +1,40 @@ import logging import os import numpy as np -from histoqc.BaseImage import printMaskHelper -from skimage import io, color, img_as_ubyte +from histoqc.BaseImage import printMaskHelper, BaseImage +from skimage import io, color +from histoqc.array_adapter import ArrayAdapter, Device +from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage.filters import threshold_otsu, rank from skimage.morphology import disk from sklearn.cluster import KMeans from skimage import exposure -import matplotlib.pyplot as plt +def getIntensityThresholdOtsu(s: BaseImage, params): - -def getIntensityThresholdOtsu(s, params): logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu") + adapter = s.image_handle.adapter name = params.get("name", "classTask") local = strtobool(params.get("local", "False")) - radius = float(params.get("radius", 15)) - selem = disk(radius) + radius = int(params.get("radius", 15)) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) if local: - thresh = rank.otsu(img, selem) + thresh = adapter(rank.otsu)(img, footprint=disk(radius)) else: - thresh = threshold_otsu(img) + thresh = adapter(threshold_otsu)(img) - map = img < thresh + region_below_thresh = img < thresh - s["img_mask_" + name] = map > 0 + s["img_mask_" + name] = region_below_thresh > 0 if strtobool(params.get("invert", "False")): s["img_mask_" + name] = ~s["img_mask_" + name] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(s["img_mask_" + name])) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_" + name])) prev_mask = s["img_mask_use"] s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] @@ -51,10 +51,11 @@ def getIntensityThresholdOtsu(s, params): return -def getIntensityThresholdPercent(s, params): +def getIntensityThresholdPercent(s: BaseImage, params): + + adapter = s.image_handle.adapter name = params.get("name", "classTask") logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}") - lower_thresh = float(params.get("lower_threshold", "-inf")) upper_thresh = float(params.get("upper_threshold", "inf")) @@ -67,24 +68,23 @@ def getIntensityThresholdPercent(s, params): img = s.getImgThumb(s["image_work_size"]) img_std = img.std(axis=2) - map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std) - - img = color.rgb2gray(img) - map = np.bitwise_and(img > lower_thresh, img < upper_thresh) - - map = np.bitwise_and(map, map_std) - - s["img_mask_" + name] = map > 0 + map_std = adapter.and_(img_std > lower_std, img_std < upper_std) + img = adapter(color.rgb2gray)(img) + region_between_interval = adapter.and_(img > lower_thresh, img < upper_thresh) + region_between_interval = adapter.and_(region_between_interval, map_std) + s["img_mask_" + name] = region_between_interval > 0 if strtobool(params.get("invert", "False")): s["img_mask_" + name] = ~s["img_mask_" + name] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(prev_mask & ~s["img_mask_" + name])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_" + name]) + fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png") + mask_out = adapter.and_(prev_mask, ~s["img_mask_" + name]) + mask_out = adapter(img_as_ubyte)(mask_out) + adapter.imsave(fname, mask_out) s.addToPrintList(name, printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -98,11 +98,8 @@ def getIntensityThresholdPercent(s, params): return - - - - -def removeBrightestPixels(s, params): +def removeBrightestPixels(s: BaseImage, params): + adapter = s.image_handle.adapter logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels") # lower_thresh = float(params.get("lower_threshold", -float("inf"))) @@ -112,23 +109,26 @@ def removeBrightestPixels(s, params): # upper_var = float(params.get("upper_variance", float("inf"))) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) - kmeans = KMeans(n_clusters=3, n_init=1).fit(img.reshape([-1, 1])) + kmc = KMeans(n_clusters=3, n_init=1) + kmeans = adapter(kmc.fit)(img.reshape([-1, 1])) + # noinspection PyUnresolvedReferences brightest_cluster = np.argmax(kmeans.cluster_centers_) + # noinspection PyUnresolvedReferences darkest_point_in_brightest_cluster = (img.reshape([-1, 1])[kmeans.labels_ == brightest_cluster]).min() s["img_mask_bright"] = img > darkest_point_in_brightest_cluster - - if strtobool(params.get("invert", "False")): s["img_mask_bright"] = ~s["img_mask_bright"] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"]) + fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png.png") + bright_out = adapter.and_(prev_mask, ~s["img_mask_bright"]) + bright_out = adapter(img_as_ubyte)(bright_out) + adapter.imsave(fname, bright_out) s.addToPrintList("brightestPixels", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -142,28 +142,28 @@ def removeBrightestPixels(s, params): return - -def minimumPixelIntensityNeighborhoodFiltering(s,params): +def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params): logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering") + adapter = s.image_handle.adapter disk_size = int(params.get("disk_size", 10000)) threshold = int(params.get("upper_threshold", 200)) img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - img = (img * 255).astype(np.uint8) - selem = disk(disk_size) + img = adapter(color.rgb2gray)(img) + img = adapter(img_as_ubyte)(img) + # note - for uint type, CPU's rank.minimum is >>> faster than GPU's erosion + imgfilt = adapter(rank.minimum)(img, footprint=disk(disk_size)) - imgfilt = rank.minimum(img, selem) s["img_mask_bright"] = imgfilt > threshold - if strtobool(params.get("invert", "True")): s["img_mask_bright"] = ~s["img_mask_bright"] prev_mask = s["img_mask_use"] - s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"]) + fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png") + mask_out = adapter.and_(prev_mask, ~s["img_mask_bright"]) + adapter.imsave(fname, adapter(img_as_ubyte)(mask_out)) s.addToPrintList("brightestPixels", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -176,13 +176,16 @@ def minimumPixelIntensityNeighborhoodFiltering(s,params): return -def saveEqualisedImage(s,params): - logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage") +def saveEqualisedImage(s: BaseImage, params): + logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage") + adapter = s.image_handle.adapter img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - - out = exposure.equalize_hist((img*255).astype(np.uint8)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", img_as_ubyte(out)) + img = adapter(color.rgb2gray)(img) + img_u8 = adapter(img_as_ubyte)(img) + out = adapter(exposure.equalize_hist)(img_u8) + out_u8 = ArrayAdapter.curate_arrays_device(adapter(img_as_ubyte)(out), + device=Device.build(Device.DEVICE_CPU)) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", out_u8) return diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py index 6d3733f..d7babd3 100644 --- a/histoqc/LocalTextureEstimationModule.py +++ b/histoqc/LocalTextureEstimationModule.py @@ -1,50 +1,53 @@ import logging import numpy as np -from skimage import color +from skimage import color from distutils.util import strtobool from skimage.feature import graycomatrix, graycoprops -import matplotlib.pyplot as plt +from histoqc.array_adapter import ArrayAdapter, Device +from histoqc.BaseImage import BaseImage - -def estimateGreyComatrixFeatures(s, params): +def estimateGreyComatrixFeatures(s: BaseImage, params): prefix = params.get("prefix", None) prefix = prefix+"_" if prefix else "" - + adapter = s.image_handle.adapter logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}") patch_size = int(params.get("patch_size", 32)) npatches = int(params.get("npatches", 100)) nlevels = int(params.get("nlevels", 8)) - feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') + feats = params.get("feats", "contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') invert = strtobool(params.get("invert", "False")) - mask_name = params.get("mask_name","img_mask_use") - + mask_name = params.get("mask_name", "img_mask_use") img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) + img = adapter(color.rgb2gray)(img) mask = s[mask_name] if not invert else ~s[mask_name] if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask - msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask" + msg = (f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}" + f" Can not estimate the empty mask since NO tissue remains detectable in mask") logging.warning(f"{s['filename']} - {msg}") s["warnings"].append(msg) return maskidx = mask.nonzero() - maskidx = np.asarray(maskidx).transpose() + maskidx = ArrayAdapter.new_array(maskidx, array_device=Device.build(Device.DEVICE_CPU)).transpose() idx = np.random.choice(maskidx.shape[0], npatches) results = [] + for index in idx: + r, c = maskidx[index, :] - for id in idx: - r, c = maskidx[id, :] patch = img[r:r + patch_size, c:c + patch_size] - glcm = graycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5], - angles=[0], levels=nlevels, symmetric=True, normed=True) - results.append([graycoprops(glcm, prop=feat) for feat in feats]) + image = adapter(np.digitize)(patch, bins=np.linspace(0, 1, num=nlevels), right=True) + glcm = adapter(graycomatrix)(image, distances=[5], + angles=[0], levels=nlevels, symmetric=True, normed=True) + haralick_feats = [adapter(graycoprops)(glcm, prop=feat) for feat in feats] + haralick_feats = adapter.device_sync_all(*haralick_feats) + results.append(haralick_feats) - results = np.asarray(results).squeeze() + results = adapter.asarray(results).squeeze() for vals, feat in zip(results.transpose(), feats): s.addToPrintList(f"{prefix}{feat}", str(vals.mean())) diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py index 68e8437..426a358 100644 --- a/histoqc/MorphologyModule.py +++ b/histoqc/MorphologyModule.py @@ -1,31 +1,30 @@ import logging import os import numpy as np -from histoqc.BaseImage import printMaskHelper -from skimage import io, morphology, img_as_ubyte, measure +from histoqc.BaseImage import printMaskHelper, BaseImage +from histoqc.array_adapter import ArrayAdapter +from histoqc.array_adapter.typing import TYPE_ARRAY +from skimage import morphology, measure +from skimage.util import img_as_ubyte -from scipy import ndimage as ndi -import matplotlib.pyplot as plt # these 2 are used for debugging -from histoqc.SaveModule import blend2Images #for easier debugging - - -def removeSmallObjects(s, params): +def removeSmallObjects(s: BaseImage, params): logging.info(f"{s['filename']} - \tremoveSmallObjects") + adapter = s.image_handle.adapter min_size = int(params.get("min_size", 64)) - img_reduced = morphology.remove_small_objects(s["img_mask_use"], min_size=min_size) - img_small = np.invert(img_reduced) & s["img_mask_use"] + img_reduced = adapter(morphology.remove_small_objects)(s["img_mask_use"], min_size=min_size) + img_small = adapter.and_(~img_reduced, s["img_mask_use"]) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_remove.png", img_as_ubyte(img_small)) + fname = os.path.join(s["outdir"], f"{s['filename']}_small_remove.png") + adapter.imsave(fname, adapter(img_as_ubyte)(img_small)) s["img_mask_small_filled"] = (img_small * 255) > 0 prev_mask = s["img_mask_use"] s["img_mask_use"] = img_reduced - - - rps = measure.regionprops(morphology.label(img_small)) + label_small = adapter(morphology.label)(img_small) + rps = adapter(measure.regionprops)(label_small) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() @@ -36,30 +35,25 @@ def removeSmallObjects(s, params): s.addToPrintList("small_tissue_removed_mean_area", str(area_mean)) s.addToPrintList("small_tissue_removed_max_area", str(area_max)) - - - - s.addToPrintList("small_tissue_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) - if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty logging.warning(f"{s['filename']} - After MorphologyModule.removeSmallObjects: NO tissue " f"remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append(f"After MorphologyModule.removeSmallObjects: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") - return -def remove_large_objects(img, max_size): +def remove_large_objects(img: TYPE_ARRAY, max_size: int, adapter: ArrayAdapter): # code taken from morphology.remove_small_holes, except switched < with > - selem = ndi.generate_binary_structure(img.ndim, 1) - ccs = np.zeros_like(img, dtype=np.int32) - ndi.label(img, selem, output=ccs) - component_sizes = np.bincount(ccs.ravel()) - too_big = component_sizes > max_size + # selem = adapter(ndi.generate_binary_structure)(img.ndim, connectivity=1) + # equivalent to ndi.label in binary case. + img = adapter.sync(img) + ccs = adapter(measure.label)(img, connectivity=1) + component_sizes: TYPE_ARRAY = adapter.sync(np.bincount(ccs.ravel())) + too_big: TYPE_ARRAY = component_sizes > max_size too_big_mask = too_big[ccs] img_out = img.copy() img_out[too_big_mask] = 0 @@ -68,40 +62,41 @@ def remove_large_objects(img, max_size): def removeFatlikeTissue(s, params): logging.info(f"{s['filename']} - \tremoveFatlikeTissue") + adapter = s.image_handle.adapter fat_cell_size = int(params.get("fat_cell_size", 64)) kernel_size = int(params.get("kernel_size", 3)) max_keep_size = int(params.get("max_keep_size", 1000)) - img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=fat_cell_size) - img_small = img_reduced & np.invert(s["img_mask_use"]) - img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9) + img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=fat_cell_size) + img_small = adapter.and_(img_reduced, ~s["img_mask_use"]) + img_small = ~adapter(morphology.remove_small_holes)(~img_small, area_threshold=9) + # binary + mask_dilate = adapter(morphology.dilation)(img_small, footprint=np.ones((kernel_size, kernel_size))) - mask_dilate = morphology.dilation(img_small, selem=np.ones((kernel_size, kernel_size))) - mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size) + mask_dilate_removed = remove_large_objects(mask_dilate, max_size=max_keep_size, adapter=adapter) - mask_fat = mask_dilate & ~mask_dilate_removed + mask_fat = adapter.and_(mask_dilate, ~mask_dilate_removed) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_fatlike.png", img_as_ubyte(mask_fat)) + fname = os.path.join(s["outdir"], f"{s['filename']}_fatlike.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask_fat)) s["img_mask_fatlike"] = (mask_fat * 255) > 0 prev_mask = s["img_mask_use"] - s["img_mask_use"] = prev_mask & ~mask_fat + s["img_mask_use"] = adapter.and_(prev_mask, ~mask_fat) - rps = measure.regionprops(morphology.label(mask_fat)) + label_fat = adapter(morphology.label)(mask_fat) + rps = adapter(measure.regionprops)(label_fat) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() else: nobj = area_max = area_mean = 0 - s.addToPrintList("fatlike_tissue_removed_num_regions", str(nobj)) s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean)) s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max)) - - s.addToPrintList("fatlike_tissue_removed_percent", printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) @@ -110,24 +105,27 @@ def removeFatlikeTissue(s, params): f"remains detectable! Downstream modules likely to be incorrect/fail") s["warnings"].append(f"After MorphologyModule.removeFatlikeTissue: NO tissue remains " f"detectable! Downstream modules likely to be incorrect/fail") + return def fillSmallHoles(s, params): logging.info(f"{s['filename']} - \tfillSmallHoles") + adapter = s.image_handle.adapter min_size = int(params.get("min_size", 64)) - img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size) - img_small = img_reduced & np.invert(s["img_mask_use"]) - + img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=min_size) + img_small = adapter.and_(img_reduced, np.invert(s["img_mask_use"])) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small)) + fname = os.path.join(s["outdir"], f"{s['filename']}_small_fill.png") + adapter.imsave(fname, adapter(img_as_ubyte)(img_small)) s["img_mask_small_removed"] = (img_small * 255) > 0 prev_mask = s["img_mask_use"] s["img_mask_use"] = img_reduced - rps = measure.regionprops(morphology.label(img_small)) + label_small = adapter(morphology.label)(img_small) + rps = adapter(measure.regionprops)(label_small) if rps: - areas = np.asarray([rp.area for rp in rps]) + areas = np.asarray([float(rp.area) for rp in rps]) nobj = len(rps) area_max = areas.max() area_mean = areas.mean() diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py index 627f8ca..b1451f4 100644 --- a/histoqc/SaveModule.py +++ b/histoqc/SaveModule.py @@ -1,76 +1,88 @@ import logging import os -from skimage import io, img_as_ubyte +from skimage.util import img_as_ubyte from distutils.util import strtobool from skimage import color import numpy as np +from histoqc.BaseImage import BaseImage +from histoqc.array_adapter.typing import TYPE_ARRAY +from histoqc.array_adapter import ArrayAdapter -import matplotlib.pyplot as plt - -def blend2Images(img, mask): - if (img.ndim == 3): - img = color.rgb2gray(img) - if (mask.ndim == 3): - mask = color.rgb2gray(mask) +def blend2Images(img: TYPE_ARRAY, mask: TYPE_ARRAY, adapter: ArrayAdapter): + if img.ndim == 3: + img = adapter(color.rgb2gray)(img) + if mask.ndim == 3: + mask = adapter(color.rgb2gray)(mask) img = img[:, :, None] * 1.0 # can't use boolean mask = mask[:, :, None] * 1.0 + # explicitly sync again to satisfy the requirement of using np.concatenate as a unified concatenate func + img, mask = adapter.device_sync_all(img, mask) out = np.concatenate((mask, img, mask), 2) return out -def saveFinalMask(s, params): +def saveFinalMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveUsableRegion") - + adapter = s.image_handle.adapter mask = s["img_mask_use"] for mask_force in s["img_mask_force"]: + mask, s[mask_force] = adapter.device_sync_all(mask, s[mask_force]) mask[s[mask_force]] = 0 - io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask)) + fname = os.path.join(s["outdir"], f"{s['filename']}_mask_use.png") + adapter.imsave(fname, adapter(img_as_ubyte)(mask)) if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask? img = s.getImgThumb(s["image_work_size"]) - out = blend2Images(img, mask) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out)) - + out = blend2Images(img, mask, adapter) + fname = os.path.join(s["outdir"], f"{s['filename']}_fuse.png") + adapter.imsave(fname, adapter(img_as_ubyte)(out)) return -def saveAssociatedImage(s, key:str, dim:int): +def saveAssociatedImage(s: BaseImage, key: str, dim: int): logging.info(f"{s['filename']} - \tsave{key.capitalize()}") - osh = s["os_handle"] + image_handle = s.image_handle - if not key in osh.associated_images: + if key not in image_handle.associated_images: message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images" logging.warning(message) s["warnings"].append(message) return - + # get asscociated image by key - associated_img = osh.associated_images[key] - (width, height) = associated_img.size - - # calulate the width or height depends on dim - if width > height: - h = round(dim * height / width) - size = (dim, h) - else: - w = round(dim * width / height) - size = (w, dim) - - associated_img = associated_img.resize(size) - associated_img = np.asarray(associated_img)[:, :, 0:3] - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img) + associated_img = image_handle.associated_images[key] + width, height = image_handle.__class__.backend_dim(associated_img) + + if width * height == 0: + message = f"{s['filename']}- Irregular Size {width, height} of the Associated Images: {key}" + logging.warning(message) + s["warnings"].append(message) + return + + aspect_ratio = width / height + size = image_handle.__class__.curate_to_max_dim(width, height, max_size=dim, aspect_ratio=aspect_ratio) + # to pillow handle + associated_img = image_handle.__class__.backend_to_pil(associated_img) + # resize the pil (RGB) + associated_img = associated_img.resize(size).convert("RGB") + # save the pil + fname = os.path.join(s["outdir"], f"{s['filename']}_{key}.png") + associated_img.save(fname) + return + def saveMacro(s, params): dim = params.get("small_dim", 500) saveAssociatedImage(s, "macro", dim) return - -def saveMask(s, params): + + +def saveMask(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveMaskUse") suffix = params.get("suffix", None) - + adapter = s.image_handle.adapter # check suffix param if not suffix: msg = f"{s['filename']} - \tPlease set the suffix for mask use." @@ -78,14 +90,21 @@ def saveMask(s, params): return # save mask - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) + fname = os.path.join(s['outdir'], f"{s['filename']}_{suffix}.png") + adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_use"])) + + return + -def saveThumbnails(s, params): +def saveThumbnails(s: BaseImage, params): logging.info(f"{s['filename']} - \tsaveThumbnail") # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one img = s.getImgThumb(params.get("image_work_size", "1.25x")) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img) + adapter = s.image_handle.adapter + fname_thumb = os.path.join(s["outdir"], f"{s['filename']}_thumb.png") + adapter.imsave(fname_thumb, adapter(img_as_ubyte)(img)) img = s.getImgThumb(params.get("small_dim", 500)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img) + fname_small = os.path.join(s["outdir"], f"{s['filename']}_thumb_small.png") + adapter.imsave(fname_small, adapter(img_as_ubyte)(img)) return diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py index 0101ca4..e3cf69e 100644 --- a/histoqc/TileExtractionModule.py +++ b/histoqc/TileExtractionModule.py @@ -4,9 +4,9 @@ are open. """ import os -import openslide import json from histoqc.BaseImage import BaseImage +from histoqc.array_adapter import Device from typing import Callable, Dict, Any, List, Tuple, Union import numpy as np from PIL import Image, ImageDraw @@ -14,11 +14,9 @@ from contextlib import contextmanager from distutils.util import strtobool import logging -from histoqc.import_wrapper.typing import Literal, get_args -# from histoqc.import_wrapper.helper import dynamic_import -# __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] -# Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") -# get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions") +from typing_extensions import Literal, get_args +from PIL.Image import Image as PILImage + TYPE_TILE_SIZE = Literal['tile_size'] TYPE_TILE_STRIDE = Literal['tile_stride'] @@ -38,6 +36,7 @@ TYPE_BBOX_INT = Tuple[int, int, int, int] +# noinspection PyUnusedLocal def default_screen_identity(img: np.ndarray): return True @@ -269,10 +268,10 @@ def max_tile_bbox_top_left_coord(rp_bbox: TYPE_BBOX_INT, work_tile_size: float, tile_max_left = left_rp + max_step_horiz * work_stride tile_max_top = top_rp + max_step_vert * work_stride - assert round(tile_max_left + work_tile_size) <= right_rp,\ + assert round(tile_max_left + work_tile_size) <= right_rp, \ f"left + size check" \ f" {tile_max_left + work_tile_size} = {tile_max_left} + {work_tile_size} <= {right_rp} fail" - assert round(tile_max_top + work_tile_size) <= bottom_rp,\ + assert round(tile_max_top + work_tile_size) <= bottom_rp, \ f"top + size check" \ f" {tile_max_top + work_tile_size} = {tile_max_top} + {work_tile_size} <= {bottom_rp} fail" return int(tile_max_top), int(tile_max_left) @@ -345,8 +344,6 @@ def _tile_windows_helper(mask_use_for_tiles, """ mask = mask_use_for_tiles - # image_handle: openslide.OpenSlide = s["os_handle"] - # img_w, img_h = image_handle.dimensions assert mask is not None, f"{filename}: mask is not initialized" assert isinstance(mask, np.ndarray), f"The mask is expected to be a Numpy NDArray" @@ -393,7 +390,7 @@ def tile_windows(self, root_dict = self.__tile_window_cache entry = root_dict.get(key, None) if entry is None or force_rewrite: - # img_w, img_h = s['os_handle'].dimensions + # img_w, img_h = s.image_handle.dimensions root_dict[key] = TileExtractor._tile_windows_helper(mask_use_for_tiles, img_w, img_h, tile_size, tile_stride, tissue_thresh) @@ -532,7 +529,7 @@ def valid_tile_extraction(self, tw: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h, tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite) window_list_of_regions = tw.windows_on_original_image - image_handle: openslide.OpenSlide = s["os_handle"] + image_handle = s.image_handle valid_window_list_all_regions: List[List[Tuple[int, int, int, int]]] = [] for region_windows in window_list_of_regions: region_windows: List[Tuple[int, int, int, int]] @@ -541,7 +538,7 @@ def valid_tile_extraction(self, window: Tuple[int, int, int, int] # just to make the convention clear location, size = TileExtractor.__window_convert(window) - region = image_handle.read_region(location, 0, size) + region: PILImage = image_handle.read_region(location, 0, size) tile_np = np.array(region, copy=False) valid_flag = screen_callbacks(tile_np) if not valid_flag: @@ -556,7 +553,10 @@ def valid_tile_extraction(self, def extract(s: BaseImage, params: Dict[PARAMS, Any]): + logging.info(f"{s['filename']} - \textract") + + adapter = s.image_handle.adapter with params['lock']: slide_out = s['outdir'] tile_output_dir = params.get('tile_output', os.path.join(slide_out, 'tiles')) @@ -570,10 +570,11 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]): tile_size = int(params.get('tile_size', 256)) tile_stride = int(params.get('tile_stride', 256)) tissue_thresh = float(params.get('tissue_ratio', 0.5)) - - img_use_for_tiles = s.getImgThumb(s["image_work_size"]) - mask_use_for_tiles = s['img_mask_use'] - image_handle: openslide.OpenSlide = s['os_handle'] + # no added value from GPU acceleration (except for read_region) as the procedure is sequential + img_use_for_tiles, mask_use_for_tiles = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]), + s['img_mask_use'], + device=Device.build(Device.DEVICE_CPU)) + image_handle = s.image_handle img_w, img_h = image_handle.dimensions tile_extractor = TileExtractor(s) diff --git a/histoqc/__main__.py b/histoqc/__main__.py index c0bd0c0..a537948 100644 --- a/histoqc/__main__.py +++ b/histoqc/__main__.py @@ -7,21 +7,87 @@ import os import sys import time -from functools import partial +# from functools import partial +from typing import Tuple, Optional, List +import dask.distributed +from logging.config import dictConfig from histoqc._pipeline import BatchedResultFile -from histoqc._pipeline import MultiProcessingLogManager +# from histoqc._pipeline import MultiProcessingLogManager +from copy import deepcopy +from histoqc._worker_setup import WorkerSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, \ + HDL_OUT_FIELD, setup_plotting_backend, LoggerConfigBuilder, FMT_DFT, HDL_CONSOLE from histoqc._pipeline import load_pipeline from histoqc._pipeline import log_pipeline -from histoqc._pipeline import move_logging_file_handler -from histoqc._pipeline import setup_logging -from histoqc._pipeline import setup_plotting_backend -from histoqc._worker import worker -from histoqc._worker import worker_setup -from histoqc._worker import worker_success -from histoqc._worker import worker_error +# from histoqc._pipeline import move_logging_file_handler +# from histoqc._pipeline import setup_logging +from histoqc._worker import (worker, worker_setup, worker_success, worker_error, worker_single_process, + PARAM_SHARE, KEY_ASSIGN) + from histoqc.config import read_config_template from histoqc.data import managed_pkg_data +from histoqc.wsi_handles.constants import KEY_CUCIM +from histoqc.import_wrapper.cupy_extra import cp, cupy_installed +from histoqc.import_wrapper.dask_cuda import dask_cuda_installed, dask_cuda +from dask.distributed import Client, as_completed + + +def parse_config(args: argparse.Namespace) -> Tuple[configparser.ConfigParser, Optional[str]]: + config = configparser.ConfigParser() + msg = None + if not args.config: + msg = f"Configuration file not set (--config), using default" + config.read_string(read_config_template('default')) + elif os.path.exists(args.config): + config.read(args.config) # Will read the config file + else: + msg = f"Configuration file {args.config} assuming to be a template...checking." + config.read_string(read_config_template(args.config)) + return config, msg + + +def _get_device_list(n_proc: int): + return list(range(n_proc)) if n_proc > 0 else [0] + + +def parse_multiprocessing(args: argparse.Namespace, + config: configparser.ConfigParser) -> Tuple[argparse.Namespace, bool]: # List[int], + is_multiproc = args.nprocesses >= 0 + is_cuda = KEY_CUCIM in config["BaseImage.BaseImage"].get("handles", "") + + # if use cuda but without installation of dependencies - return + if not is_cuda or not (cupy_installed() and dask_cuda_installed()): + return args, False # _get_device_list(args.nprocesses), + # guard + assert is_cuda and cp is not None and dask_cuda is not None, f"Enable CUDA but Dep is not installed" + # set spawn + if is_multiproc: + multiprocessing.set_start_method("spawn", force=True) + num_devices = cp.cuda.runtime.getDeviceCount() + # n_proc cannot exceed num of GPUs. + assert num_devices > 0, f"Fail to detect usable CUDA devices" + if args.nprocesses > num_devices: + logging.warning(f"{__name__}: CUDA enabled but number of processes is greater than number of devices:" + f"{args.nprocesses} > {num_devices}. Cutoff the number of processes to {num_devices}") + args.nprocesses = min(args.nprocesses, num_devices) + # device list --> if + # device_list = _get_device_list(args.nprocesses) + return args, is_cuda + + +def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]], + nvlink: bool, spill_limit, rmm_pool, num_threads: int): + assert nprocesses > 0, f"Expect number of processes > 0 to launch the cluster. Get {nprocesses}" + if not is_cuda: + return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, threads_per_worker=num_threads) + assert cp is not None and dask_cuda is not None + return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, + jit_unspill=True, + threads_per_worker=num_threads, + device_memory_limit=spill_limit, + n_workers=nprocesses, + enable_nvlink=nvlink, + rmm_pool_size=rmm_pool, ) # rmm_maximum_pool_size="10GB" @managed_pkg_data @@ -30,7 +96,9 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(prog="histoqc", description='Run HistoQC main quality control pipeline for digital pathology images') + parser = argparse.ArgumentParser(prog="histoqc", + description='Run HistoQC main quality control pipeline' + ' for digital pathology images') parser.add_argument('input_pattern', help="input filename pattern (try: *.svs or target_path/*.svs )," " or tsv file containing list of files to analyze", @@ -61,25 +129,46 @@ def main(argv=None): parser.add_argument('--symlink', metavar="TARGET_DIR", help="create symlink to outdir in TARGET_DIR", default=None) + parser.add_argument('--gpu_ids', + type=int, + nargs='+', + help="GPU Devices to use (None=use all). Default: None", + default=None) + parser.add_argument('--nvlink', action='store_true', + help='whether to enable NVLINK. Only applied when CUDA cluster is launched') + parser.add_argument('--rmm_pool', type=str, default="10GB", + help='Pool size. Only applied when CUDA cluster is launched') + parser.add_argument('--spill_limit', type=float, default=0.5, + help='Percentage threshold of GPU memory before spill to host memory') + parser.add_argument('--num_threads', type=int, default=4, + help='# of threads per worker.') args = parser.parse_args(argv) # --- multiprocessing and logging setup ----------------------------------- - setup_logging(capture_warnings=True, filter_warnings='ignore') - mpm = multiprocessing.Manager() - lm = MultiProcessingLogManager('histoqc', manager=mpm) - # --- parse the pipeline configuration ------------------------------------ + config, conf_warn_msg = parse_config(args) + args, is_cuda = parse_multiprocessing(args, config) - config = configparser.ConfigParser() - if not args.config: - lm.logger.warning(f"Configuration file not set (--config), using default") - config.read_string(read_config_template('default')) - elif os.path.exists(args.config): - config.read(args.config) #Will read the config file - else: - lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.") - config.read_string(read_config_template(args.config)) + # --- create output directory and move log -------------------------------- + args.outdir = os.path.expanduser(args.outdir) + os.makedirs(args.outdir, exist_ok=True) + # move_logging_file_handler(logging.getLogger(), args.outdir) + + logging_setup = WorkerSetup(deepcopy(MAIN_CONF_BUILD), + deepcopy(WORKER_CONF_BUILD), + capture_warnings=True, filter_warnings='ignore') + # setup main proc logger config and file handler. + logging_setup.setup_mainprocess_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN, + handler_name=HDL_FILE, out_field=HDL_OUT_FIELD) + + # Inherit from the root logger. + + main_logger = logging.getLogger(__name__) + main_logger.addHandler(logging.StreamHandler(sys.stdout)) + + if conf_warn_msg: + main_logger.warning(conf_warn_msg) # --- provide models, pen and templates as fallbacks from package data ---- @@ -87,27 +176,23 @@ def main(argv=None): # --- load the process queue (error early) -------------------------------- - _steps = log_pipeline(config, log_manager=lm) + _steps = log_pipeline(config, logger=main_logger) process_queue = load_pipeline(config) # --- check symlink target ------------------------------------------------ - if args.symlink is not None: if not os.path.isdir(args.symlink): - lm.logger.error("error: --symlink {args.symlink} is not a directory") + main_logger.error("error: --symlink {args.symlink} is not a directory") return -1 - # --- create output directory and move log -------------------------------- - args.outdir = os.path.expanduser(args.outdir) - os.makedirs(args.outdir, exist_ok=True) - move_logging_file_handler(logging.getLogger(), args.outdir) - if BatchedResultFile.results_in_path(args.outdir): if args.force: - lm.logger.info("Previous run detected....overwriting (--force set)") + main_logger.info("Previous run detected....overwriting (--force set)") else: - lm.logger.info("Previous run detected....skipping completed (--force not set)") - + main_logger.info("Previous run detected....skipping completed (--force not set)") + # for writing the results after workers succeed. + mpm = multiprocessing.Manager() + # results only utilize the lock and sync list from mpm. mpm is not saved. results = BatchedResultFile(args.outdir, manager=mpm, batch_size=args.batch, @@ -143,9 +228,9 @@ def main(argv=None): pth = os.path.join(args.basepath, args.input_pattern[0]) files = glob.glob(pth, recursive=True) - lm.logger.info("-" * 80) + main_logger.info("-" * 80) num_files = len(files) - lm.logger.info(f"Number of files detected by pattern:\t{num_files}") + main_logger.info(f"Number of files detected by pattern:\t{num_files}") # --- start worker processes ---------------------------------------------- @@ -153,59 +238,53 @@ def main(argv=None): 'process_queue': process_queue, 'config': config, 'outdir': args.outdir, - 'log_manager': lm, - 'lock': mpm.Lock(), - 'shared_dict': mpm.dict(), + 'lock': dask.distributed.Lock('x'), # mpm.Lock(), # todo transit to Dask's Lock + PARAM_SHARE: mpm.dict(), # todo transit to Dask's Variable 'num_files': num_files, 'force': args.force, } + # init the dict of device assignment + _shared_state[PARAM_SHARE][KEY_ASSIGN] = mpm.dict() failed = mpm.list() - setup_plotting_backend(lm.logger) + setup_plotting_backend(main_logger) try: - if args.nprocesses > 1: - - with lm.logger_thread(): - print(args.nprocesses) - with multiprocessing.Pool(processes=args.nprocesses, - initializer=worker_setup, - initargs=(config,)) as pool: - try: - for idx, file_name in enumerate(files): - _ = pool.apply_async( - func=worker, - args=(idx, file_name), - kwds=_shared_state, - callback=partial(worker_success, result_file=results), - error_callback=partial(worker_error, failed=failed), - ) - - finally: - pool.close() - pool.join() - + if args.nprocesses > 0: + local_cluster = new_cluster('histoqc', is_cuda=is_cuda, + nprocesses=args.nprocesses, gpu_ids=args.gpu_ids, + nvlink=args.nvlink, spill_limit=args.spill_limit, + rmm_pool=args.rmm_pool, num_threads=args.num_threads) + with local_cluster: + with Client(local_cluster) as client: + # register the worker side + logging_setup.setup_client(client, forward_name="root") + + # noinspection PyTypeChecker + futures_list = [client.submit(worker, idx, file_name, **_shared_state) + for idx, file_name in enumerate(files)] + + for future in as_completed(futures_list): + try: + base_img_finished = future.result() + except Exception as exc: + worker_error(exc, failed) + else: + worker_success(base_img_finished, result_file=results) else: - for idx, file_name in enumerate(files): - try: - _success = worker(idx, file_name, **_shared_state) - except Exception as exc: - worker_error(exc, failed) - continue - else: - worker_success(_success, results) - + worker_setup() + worker_single_process(files, failed, results, **_shared_state) except KeyboardInterrupt: - lm.logger.info("-----REQUESTED-ABORT-----\n") + main_logger.info("-----REQUESTED-ABORT-----\n") else: - lm.logger.info("----------Done-----------\n") + main_logger.info("----------Done-----------\n") finally: - lm.logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log)," - " warnings are listed in warnings column in output") + main_logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log)," + " warnings are listed in warnings column in output") for file_name, error, tb in failed: - lm.logger.info(f"{file_name}\t{error}\n{tb}") + main_logger.info(f"{file_name}\t{error}\n{tb}") if args.symlink is not None: origin = os.path.realpath(args.outdir) @@ -215,13 +294,14 @@ def main(argv=None): ) try: os.symlink(origin, target, target_is_directory=True) - lm.logger.info("Symlink to output directory created") + main_logger.info("Symlink to output directory created") except (FileExistsError, FileNotFoundError): - lm.logger.error( + main_logger.error( f"Error creating symlink to output in '{args.symlink}', " f"Please create manually: ln -s {origin} {target}" ) return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/histoqc/_log_conf.py b/histoqc/_log_conf.py new file mode 100644 index 0000000..ae384b8 --- /dev/null +++ b/histoqc/_log_conf.py @@ -0,0 +1,205 @@ +from typing_extensions import TypedDict, NotRequired, Literal, Self +from typing import Dict, Type, Callable, Any, List, Optional, Sequence, get_args, cast +import logging.config + +FormattersDict = TypedDict('FormattersDict', { + 'format': str, + 'datefmt': str, + 'style': str, + 'validate': str, + 'defaults': str, + 'class': NotRequired[str], + +}, total=False) + +FilterDict = TypedDict("FilterDict", { + '()': Type[logging.Filter] | Callable, + 'args': NotRequired[Any], +}) + +HandlerDict = TypedDict("HandlerDict", { + "class": str, + "level": NotRequired[str], + "formatter": NotRequired[str], + "filters": NotRequired[List[str | logging.Filter]], + "filename": NotRequired[str], + "stream": NotRequired[str], + "mode": NotRequired[str], +}) + + +class LoggerDict(TypedDict, total=False): + level: str + propagate: bool + filters: List[str | logging.Filter] + handlers: List[str] + + +class ConfigDict(TypedDict): + + version: int + formatters: NotRequired[Dict[str, FormattersDict]] + filters: NotRequired[Dict[str, FilterDict]] + handlers: NotRequired[Dict[str, HandlerDict]] + loggers: NotRequired[Dict[str, LoggerDict]] + root: NotRequired[LoggerDict] + incremental: NotRequired[bool] # default False + disable_existing_loggers: NotRequired[bool] # default True + + +DEFAULT_CONSOLE = Literal['console'] +DEFAULT_FILE = Literal['file'] +TYPE_PREDEFINED_HANDLER = Literal[DEFAULT_CONSOLE, DEFAULT_FILE] + +DEFAULT_STD_OUT: str = 'ext://sys.stdout' +DEFAULT_LOG_FN: str = 'error.log' +DEFAULT_HANDLER_OUT_MAP: Dict[TYPE_PREDEFINED_HANDLER, str] = { + get_args(DEFAULT_CONSOLE)[0]: DEFAULT_STD_OUT, + get_args(DEFAULT_FILE)[0]: DEFAULT_LOG_FN, +} + +TYPE_FORMAT_DEFAULT = Literal['default'] +TYPE_FORMAT_SIMPLE = Literal['simple'] +TYPE_PREDEFINED_FORMATTER = Literal[TYPE_FORMAT_DEFAULT, TYPE_FORMAT_SIMPLE] + +PREDEFINED_FORMATTER_MAP: Dict[TYPE_PREDEFINED_FORMATTER, FormattersDict] = { + get_args(TYPE_FORMAT_DEFAULT)[0]: { + 'class': 'logging.Formatter', + 'format': '%(asctime)s - %(levelname)s - %(message)s' + }, + get_args(TYPE_FORMAT_SIMPLE)[0]: { + 'class': 'logging.Formatter', + 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + } +} + +LEVEL_DEBUG = Literal['DEBUG'] +LEVEL_INFO = Literal['INFO'] +LEVEL_WARNING = Literal['WARNING'] +LEVEL_ERROR = Literal['ERROR'] +LEVEL_CRITICAL = Literal['CRITICAL'] +LEVEL_NOTSET = Literal['NOTSET'] +LEVEL_TYPE = Literal[LEVEL_NOTSET, LEVEL_CRITICAL, LEVEL_ERROR, LEVEL_WARNING, LEVEL_INFO, LEVEL_DEBUG] + + +def default_init_helper(d: Dict, key: str, factory: Callable) -> Any: + if key in d: + return + d[key] = factory() + + +def init(attr: str, key: str, factory: Callable) -> Callable: + def decorator(func): + def wrapped(self, *args, **kwargs): + # Execute the helper function with self.some_attr, b, c + default_init_helper(getattr(self, attr), key, factory) + # Then execute the original function + return func(self, *args, **kwargs) + return wrapped + return decorator + + +class LoggerConfigBuilder: + """Builder to create log conf schema. + + Examples: + config_builder = LoggerConfigBuilder(version=1) + config = (config_builder + .add_formatter_by_type(formatter_type='simple') + .add_handler_by_type(handler_type='console', level='DEBUG', formatter_type='simple') + .add_handler_by_type(handler_type='logfile', level='ERROR', formatter_type='simple') + .add_logger(name='myapp', level='DEBUG', handlers=['console', 'logfile']) + .config) + """ + + def __init__(self, version: int): + self._config = ConfigDict(version=version, ) + + @staticmethod + def get_predefined_formatter(formatter_type) -> FormattersDict: + if formatter_type not in PREDEFINED_FORMATTER_MAP: + raise ValueError(f'Unknown predefined formatter: {formatter_type}') + return PREDEFINED_FORMATTER_MAP[formatter_type] + + @staticmethod + def get_predefined_handler(handler_type: TYPE_PREDEFINED_HANDLER, + level: LEVEL_TYPE = 'DEBUG', + formatter: str = 'simple', + target: Optional[str] = None, + mode: str = 'w') -> HandlerDict: + target = target if target is not None else DEFAULT_HANDLER_OUT_MAP[handler_type] + if handler_type == get_args(DEFAULT_CONSOLE)[0]: + return { + 'class': 'logging.StreamHandler', + 'level': level, + 'formatter': formatter, + 'stream': target + } + elif handler_type == get_args(DEFAULT_FILE)[0]: + return { + 'class': 'logging.FileHandler', + 'level': level, + 'formatter': formatter, + 'filename': 'app.log', + 'mode': mode, + } + raise ValueError(f'Unknown pre-defined handler type: {handler_type}') + + @staticmethod + def create_logger(*, level: LEVEL_TYPE = 'DEBUG', handlers: List[str] = ('console', ), + propagate: bool = False, + filters: Optional[List[str | logging.Filter]] = None) -> LoggerDict: + if filters is None: + filters = [] + return LoggerDict(level=level, handlers=handlers, propagate=propagate, filters=filters) + + @init("config", 'formatters', dict) + def add_formatter_by_type(self, *, formatter_type: TYPE_PREDEFINED_FORMATTER): + # default_init(self.config, 'formatters', dict) + self._config['formatters'][formatter_type] = self.get_predefined_formatter(formatter_type) + return self + + @init("config", 'formatters', dict) + def add_formatter(self, *, name: str, formatter_dict: FormattersDict): + # default_init(self.config, 'formatters', dict) + self._config['formatters'][name] = formatter_dict + return self + + @init("config", 'handlers', dict) + def add_handler(self, *, name: str, handler_dict: HandlerDict): + self._config['handlers'][name] = handler_dict + return self + + @init("config", 'handlers', dict) + def add_handler_by_type(self, *, handler_type: TYPE_PREDEFINED_HANDLER, level: LEVEL_TYPE, + formatter: str): + self._config['handlers'][handler_type] = self.get_predefined_handler(handler_type, level, formatter) + return self + + @init("config", 'loggers', dict) + def add_logger(self, *, name: str, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False, + filters: Optional[Sequence[str | logging.Filter]] = None) -> Self: + self._config['loggers'][name] = self.create_logger(level=level, handlers=handlers, + propagate=propagate, + filters=filters) + return self + + @init("config", 'root', dict) + def add_root(self, *, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False, + filters: Optional[Sequence[str | logging.Filter]] = None) -> Self: + self._config['root'] = self.create_logger(level=level, handlers=handlers, + propagate=propagate, + filters=filters) + return self + + @init("config", 'handlers', dict) + def set_handler_target(self, handler_name: str, out_field: str, out_value: str): + assert handler_name in self.config['handlers'] + handler_dict: HandlerDict = self.config['handlers'][handler_name] + out_field = cast(Literal['filename'], out_field) + handler_dict[out_field] = out_value + return self + + @property + def config(self): + return self._config diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py index 5012ce4..21c792f 100644 --- a/histoqc/_pipeline.py +++ b/histoqc/_pipeline.py @@ -7,16 +7,14 @@ import logging import multiprocessing import os -import platform import shutil -import threading import warnings from contextlib import ExitStack -from contextlib import contextmanager from contextlib import nullcontext from importlib import import_module from logging.config import dictConfig -from logging.handlers import QueueHandler +from typing_extensions import Literal +from typing import cast # --- logging helpers ------------------------------------------------- @@ -45,7 +43,7 @@ def setup_logging(*, capture_warnings, filter_warnings): 'handlers': { 'console': { 'class': 'logging.StreamHandler', - 'level': 'INFO', + 'level': 'DEBUG', # todo 'formatter': 'default', }, 'logfile': { @@ -63,7 +61,8 @@ def setup_logging(*, capture_warnings, filter_warnings): }) # configure warnings too... - warnings.filterwarnings(filter_warnings) + filter_type = Literal["default", "error", "ignore", "always", "module", "once"] + warnings.filterwarnings(cast(filter_type, filter_warnings)) logging.captureWarnings(capture_warnings) @@ -99,112 +98,24 @@ def move_logging_file_handler(logger, destination): logger.addHandler(new_handler) -class MultiProcessingLogManager: - - def __init__(self, logger_name, *, manager): - """create a MultiProcessingLogManager - - Note: this uses a multiprocessing Queue to correctly transfer - logging information from worker processes to the main - process logging instance - - Parameters - ---------- - logger_name : str - the name of the logger instance - manager : multiprocessing.Manager - the mp Manager instance used for creating sharable context - """ - self._logger_name = logger_name - self._log_queue = manager.Queue() - self._log_thread_active = False - - @property - def is_main_process(self): - return multiprocessing.current_process().name == "MainProcess" - - @property - def logger(self): - """returns the logger instance""" - if self.is_main_process: - return logging.getLogger(self._logger_name) - else: - root = logging.getLogger() - if not root.hasHandlers(): - qh = QueueHandler(self._log_queue) - root.setLevel(logging.INFO) - root.addHandler(qh) - # note: this should be revisited and set by the main process - warnings.filterwarnings('ignore') - logging.captureWarnings(True) - return root - - @contextmanager - def logger_thread(self): - """context manager for multiprocess logging - - Note: this starts the thread responsible for handing the log records - emitted by child processes to the main logger instance - """ - assert self.is_main_process - assert not self._log_thread_active # not reentrant... - self._log_thread_active = True - - def process_queue(q, ln): - main_logger = logging.getLogger(ln) - while True: - log_record = q.get() - if log_record is None: - break - main_logger.handle(log_record) - - lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name)) - lt.start() - try: - yield - finally: - self._log_queue.put(None) - lt.join() - self._log_thread_active = False - - -def log_pipeline(config, log_manager): +def log_pipeline(config, logger: logging.Logger): """log the pipeline information Parameters ---------- config : configparser.ConfigParser - log_manager : MultiProcessingLogManager + logger : logger obj to log the messages """ - assert log_manager.is_main_process + assert multiprocessing.current_process().name == "MainProcess" steps = config.get(section='pipeline', option='steps').splitlines() - log_manager.logger.info("the pipeline will use these steps:") + logger.info("the pipeline will use these steps:") for process in steps: mod_name, func_name = process.split('.') - log_manager.logger.info(f"\t\t{mod_name}\t{func_name}") + logger.info(f"\t\t{mod_name}\t{func_name}") return steps -# --- worker process helpers ------------------------------------------ - -def setup_plotting_backend(logger=None): - """loads the correct matplotlib backend - - Parameters - ---------- - logger : - the logging.Logger instance - """ - import matplotlib - if platform.system() != "Windows" and not os.environ.get('DISPLAY'): - if logger is not None: - logger.info('no display found. Using non-interactive Agg backend') - matplotlib.use('Agg') - else: - matplotlib.use('TkAgg') - - class BatchedResultFile: """BatchedResultFile encapsulates the results writing @@ -310,7 +221,7 @@ def write_headers(self, *args): Parameters ---------- - state : dict + state: dict the current histoqc implementation writes the outputs to the header files, so *args supports `state` for now. overwrite in subclass to control header output behavior diff --git a/histoqc/_worker.py b/histoqc/_worker.py index 97e5ff6..8c29d54 100644 --- a/histoqc/_worker.py +++ b/histoqc/_worker.py @@ -1,29 +1,36 @@ """histoqc worker functions""" +import logging import os import shutil - +import traceback from histoqc.BaseImage import BaseImage -from histoqc._pipeline import load_pipeline -from histoqc._pipeline import setup_plotting_backend +from histoqc._pipeline import BatchedResultFile +from histoqc._worker_setup import setup_plotting_backend +from typing import List, Optional +KEY_ASSIGN: str = 'device_assign' +PARAM_SHARE: str = 'shared_dict' # --- worker functions -------------------------------------------------------- -def worker_setup(c): +# c: configparser.Parser, cuda: bool, device_id_list: List[int], state: Dict +def worker_setup(): """needed for multiprocessing worker setup""" setup_plotting_backend() - load_pipeline(config=c) + # shared_dict = state[PARAM_SHARE] + # load_pipeline(config=c) + # device_assign(cuda, device_id_list, shared_dict) def worker(idx, file_name, *, - process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force): + process_queue, config, outdir, lock, shared_dict, num_files, force): """pipeline worker function""" - + logger = logging.getLogger() # --- output directory preparation -------------------------------- fname_outdir = os.path.join(outdir, os.path.basename(file_name)) if os.path.isdir(fname_outdir): # directory exists if not force: - log_manager.logger.warning( + logger.warning( f"{file_name} already seems to be processed (output directory exists)," " skipping. To avoid this behavior use --force" ) @@ -34,24 +41,35 @@ def worker(idx, file_name, *, # create output dir os.makedirs(fname_outdir) - log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") + logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") + # let Dask handle the device visibility/assignment + device_id = 0 # shared_dict[KEY_ASSIGN].get(os.getpid(), None) + # logger.info(f"{__name__} - {file_name}: Device ID: {dict(shared_dict[KEY_ASSIGN])}") + if device_id is None: + logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id." + f"Default: use 0 for CUDA devices.") + s: Optional[BaseImage] = None try: - s = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage"))) - + s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")), + device_id=device_id) for process, process_params in process_queue: process_params["lock"] = lock process_params["shared_dict"] = shared_dict process(s, process_params) s["completed"].append(process.__name__) - except Exception as exc: # reproduce histoqc error string - _oneline_doc_str = exc.__doc__.replace('\n', '') + logger.info(f"{file_name}: Error Block") + if s is not None: + # s.image_handle.release() + s.image_handle.close() + # print(f"DBG: {__name__}: {exc}") + _oneline_doc_str = exc.__doc__.replace('\n', '') if exc.__doc__ is not None else '' err_str = f"{exc.__class__} {_oneline_doc_str} {exc}" - - log_manager.logger.error( - f"{file_name} - Error analyzing file (skipping): \t {err_str}" + trace_string = traceback.format_exc() + logger.error( + f"{file_name} - Error analyzing file (skipping): \t {err_str}. Traceback: {trace_string}" ) if exc.__traceback__.tb_next is not None: func_tb_obj = str(exc.__traceback__.tb_next.tb_frame.f_code) @@ -62,13 +80,11 @@ def worker(idx, file_name, *, raise exc else: - # TODO: - # the histoqc workaround below is due an implementation detail in BaseImage: - # BaseImage keeps an OpenSlide instance stored under os_handle and leaks a - # file handle. This will need fixing in BaseImage. - # -> best solution would be to make BaseImage a contextmanager and close - # and cleanup the OpenSlide handle on __exit__ - s["os_handle"] = None # need to get rid of handle because it can't be pickled + # So long as the gc is triggered to delete the handle, the close is called to release the resources, + # as documented in the openslide and cuimage's source code. + # todo: should simply handle the __del__ + s.image_handle.close() + # s.image_handle.handle = None return s @@ -86,7 +102,7 @@ def worker_success(s, result_file): result_file.write_line("\t".join([_fields, _warnings])) -def worker_error(e, failed): +def worker_error(e, failed: List): """error callback""" if hasattr(e, '__histoqc_err__'): file_name, err_str, tb = e.__histoqc_err__ @@ -96,3 +112,21 @@ def worker_error(e, failed): # around the worker function file_name, err_str, tb = "N/A", f"error outside of pipeline {e!r}", None failed.append((file_name, err_str, tb)) + + +def worker_flow_for_file(idx: int, file_name: str, + failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]: + try: + base_image = worker(idx, file_name, **kwargs) + except Exception as exc: + base_image = None + worker_error(exc, failed) + else: + worker_success(base_image, results) + return base_image + + +def worker_single_process(files, failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]: + for idx, file_name in enumerate(files): + return worker_flow_for_file(idx, file_name, failed, results, **kwargs) + diff --git a/histoqc/_worker_setup.py b/histoqc/_worker_setup.py new file mode 100644 index 0000000..0de9472 --- /dev/null +++ b/histoqc/_worker_setup.py @@ -0,0 +1,250 @@ +from __future__ import annotations +import multiprocessing +import platform + +import dask +import threading +import os +from contextlib import contextmanager +from logging.handlers import QueueHandler +from typing import Dict, cast, Optional, List +from typing_extensions import Literal, Self +import warnings +from dask.distributed import WorkerPlugin +import logging +from logging.config import dictConfig +from dask.distributed import get_worker +from histoqc._log_conf import LoggerConfigBuilder +DEFAULT_LOG_FN = 'error.log' +FMT_DFT = cast(Literal['default'], 'default') +HDL_CONSOLE = 'console' +HDL_FILE = 'file' +HDL_OUT_FIELD = 'filename' +MAIN_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) + .add_handler_by_type(handler_type=HDL_FILE, level='WARNING', formatter=FMT_DFT) + .add_root(level="INFO", handlers=[HDL_FILE]) + .set_handler_target(handler_name=HDL_FILE, out_field=HDL_OUT_FIELD, out_value=DEFAULT_LOG_FN)) + +WORKER_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT) + .add_handler_by_type(handler_type=HDL_CONSOLE, level='INFO', formatter=FMT_DFT) + .add_root(level="DEBUG", handlers=[HDL_CONSOLE])) # HDL_CONSOLE + + +def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore'): + # configure warnings too... + filter_type = Literal["default", "error", "ignore", "always", "module", "once"] + warnings.filterwarnings(cast(filter_type, filter_warnings)) + logging.captureWarnings(capture_warnings) + + +def setup_plotting_backend(logger=None, distributed: bool = False): + """loads the correct matplotlib backend + + Parameters + ---------- + logger : + the logging.Logger instance + distributed: + whether a distributed framework is applied, which forces to enable Agg + """ + import matplotlib + if distributed or (platform.system() != "Windows" and not os.environ.get('DISPLAY')): + if logger is not None: + logger.info('no display found. Using non-interactive Agg backend') + matplotlib.use('Agg') + else: + matplotlib.use('TkAgg') + + +class DaskLogHandler(logging.Handler): + """Custom Handler, which emits the topic/message from builtin logging.Logger to dask's centralized logging. + + Could be useful for future if we need to migrate from builtin logger to dask's logger + """ + topic: str + + def emit(self, record): + worker = get_worker() + log_entry = self.format(record) + worker.log_event(self.topic, log_entry) + + def __init__(self, topic: str = 'logs'): + super().__init__() + self.topic = topic + + +class WorkerInitializer(WorkerPlugin): + """Worker Initializer + + """ + worker_config: Dict + logger_names: List[Optional[str]] + + def __init__(self, worker_config: Dict, capture_warnings: bool = True, filter_warnings: str = 'ignore'): + self.worker_config = worker_config + self.capture_warnings = capture_warnings + self.filter_warnings = filter_warnings + + def setup(self, worker): + logging.config.dictConfig(self.worker_config) + handle_warning(capture_warnings=self.capture_warnings, filter_warnings=self.filter_warnings) + setup_plotting_backend(logging.getLogger(), distributed=True) + + @classmethod + def build(cls, worker_config: Dict, + capture_warnings: bool = True, filter_warnings: str = 'ignore') -> Self: + return cls(worker_config, capture_warnings, filter_warnings) + + +class WorkerSetup: + _plugin: WorkerPlugin + _main_build: LoggerConfigBuilder + _worker_build_list = List[LoggerConfigBuilder] + + def __init__(self, main_build: LoggerConfigBuilder, + *worker_builds: LoggerConfigBuilder, + capture_warnings: bool = True, filter_warnings: str = 'ignore'): + self._main_build = main_build + self._worker_build_list = list(worker_builds) + self._capture_warnings = capture_warnings + self._filter_warnings = filter_warnings + + def is_main_proc(self): + return multiprocessing.current_process().name == "MainProcess" + + @classmethod + def client_setup_plugin(cls, client: dask.distributed.Client, + conf_build: LoggerConfigBuilder, + capture_warnings: bool, + filter_warnings: str) -> dask.distributed.Client: + plugin = WorkerInitializer.build(conf_build.config, capture_warnings=capture_warnings, + filter_warnings=filter_warnings) + client.register_plugin(plugin) + return client + + def setup_client(self, client: dask.distributed.Client, forward_name: Optional[str]): + # todo fill-in + if not self.is_main_proc(): + return client + # main logger setup + for wb in self._worker_build_list: + client = self.client_setup_plugin(client, wb, self._capture_warnings, self._filter_warnings) + # forward to the main process logger + client.forward_logging(logger_name=forward_name) + return client + + def curate_filename(self, *, output_dir: Optional[str], fname: Optional[str], + handler_name: Optional[str], out_field: Optional[str]): + # do nothing if any of below is None: + if handler_name is None or fname is None or out_field is None: + return + dest = fname + if output_dir is not None: + os.makedirs(output_dir, exist_ok=True) + dest = os.path.join(output_dir, fname) + assert dest is not None and handler_name is not None and out_field is not None + self._main_build = self._main_build.set_handler_target(handler_name=handler_name, + out_field=out_field, out_value=dest) + return self._main_build + + def setup_mainprocess_logger(self, *, output_dir: Optional[str], + fname: Optional[str], + handler_name: Optional[str], + out_field: Optional[str]): + if not self.is_main_proc(): + return + # main logger + self._main_build = self.curate_filename(output_dir=output_dir, fname=fname, + handler_name=handler_name, out_field=out_field) + + logging.config.dictConfig(self._main_build.config) + + def setup(self, *, + client: dask.distributed.Client, + forward_name: Optional[str], + output_dir: Optional[str], + fname: str = DEFAULT_LOG_FN, + handler_name: Optional[str] = HDL_FILE, + out_field: Optional[str] = HDL_OUT_FIELD + ): + if not self.is_main_proc(): + return + self.setup_mainprocess_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field) + self.setup_client(client, forward_name) + + +class MultiProcessingLogManager: + """Adapted from Andreas Poehlmann's implementation. + + Under the hood of dask, worker loggers can be forwarded + to client directly for both local and distributed clusters. No need + + """ + def __init__(self, logger_name, *, manager): + """create a MultiProcessingLogManager + + Note: this uses a multiprocessing Queue to correctly transfer + logging information from worker processes to the main + process logging instance + + Parameters + ---------- + logger_name : str + the name of the logger instance + manager : multiprocessing.Manager + the mp Manager instance used for creating sharable context + """ + self._logger_name = logger_name + self._log_queue = manager.Queue() + self._log_thread_active = False + + @property + def is_main_process(self): + return multiprocessing.current_process().name == "MainProcess" + + @property + def logger(self): + """returns the logger instance""" + if self.is_main_process: + return logging.getLogger(self._logger_name) + else: + root = logging.getLogger() + if not root.hasHandlers(): + qh = QueueHandler(self._log_queue) + root.setLevel(logging.INFO) + root.addHandler(qh) + # note: this should be revisited and set by the main process + warnings.filterwarnings('ignore') + logging.captureWarnings(True) + return root + + @contextmanager + def logger_thread(self): + """context manager for multiprocess logging + + Note: this starts the thread responsible for handing the log records + emitted by child processes to the main logger instance + """ + assert self.is_main_process + assert not self._log_thread_active # not reentrant... + self._log_thread_active = True + + def process_queue(q, ln): + main_logger = logging.getLogger(ln) + while True: + log_record = q.get() + if log_record is None: + break + main_logger.handle(log_record) + + lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name)) + lt.start() + try: + yield self + finally: + self._log_queue.put(None) + lt.join() + self._log_thread_active = False + +# --- worker process helpers ------------------------------------------ + diff --git a/histoqc/annotations/annot_collection.py b/histoqc/annotations/annot_collection.py index 30a187c..42eab60 100644 --- a/histoqc/annotations/annot_collection.py +++ b/histoqc/annotations/annot_collection.py @@ -2,7 +2,7 @@ from types import MappingProxyType # from shapely.strtree import STRtree # from shapely.geometry import box as shapely_box -from histoqc.import_wrapper.typing import Literal, get_args +from typing_extensions import Literal, get_args from lazy_property import LazyProperty from .annotation.base import Annotation, Region, TYPE_RAW_LABEL from .annotation.imagescope import ImageScopeAnnotation diff --git a/histoqc/annotations/annotation/base.py b/histoqc/annotations/annotation/base.py index 36da725..d549e51 100644 --- a/histoqc/annotations/annotation/base.py +++ b/histoqc/annotations/annotation/base.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod -from typing import Generic, TypeVar, Dict, Union, List, Tuple, TypedDict +from typing import TypeVar, Dict, Union, List, Tuple, Generic +from typing_extensions import TypedDict from lazy_property import LazyProperty from shapely.geometry import Polygon, MultiPolygon import logging diff --git a/histoqc/annotations/annotation/geojson.py b/histoqc/annotations/annotation/geojson.py index cf5bf2e..fc1175a 100644 --- a/histoqc/annotations/annotation/geojson.py +++ b/histoqc/annotations/annotation/geojson.py @@ -1,5 +1,5 @@ from typing import List, Dict, Callable, Any # Literal, get_args -from histoqc.import_wrapper.typing import Literal, get_args +from typing_extensions import Literal, get_args from ..io_utils.json import load_json from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET diff --git a/histoqc/array_adapter/__init__.py b/histoqc/array_adapter/__init__.py new file mode 100644 index 0000000..4ed63c5 --- /dev/null +++ b/histoqc/array_adapter/__init__.py @@ -0,0 +1,2 @@ +from .func_mapping import FUNC_MAP +from .adapter import ArrayAdapter, ArrayDeviceType, Device diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py new file mode 100644 index 0000000..cd9767d --- /dev/null +++ b/histoqc/array_adapter/adapter.py @@ -0,0 +1,476 @@ +from __future__ import annotations +from histoqc.array_adapter.typing import TYPE_NP, TYPE_CP, TYPE_ARRAY +from histoqc.array_adapter.func_mapping import FUNC_MAP +import numpy as np +from numbers import Number +from typing import Callable, Mapping, Tuple, Optional, Any, Iterable, Dict +from typing_extensions import Self, TypeGuard +from histoqc.import_wrapper.cupy_extra import cupy as cp, cupy_installed +from enum import Enum +import logging +import functools +from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv +import skimage +import re +import asyncio + + +async def async_write_image(filename, arr: np.ndarray, **kwargs): + loop = asyncio.get_running_loop() + # noinspection PyArgumentList + await loop.run_in_executor(None, skimage.io.imsave, filename, arr, **kwargs) + + +class ArrayDeviceType(Enum): + CPU: str = 'cpu' + CUDA: str = 'cuda' + + @classmethod + def from_bool(cls, on_cpu: bool): + value = 'cpu' if on_cpu else 'cuda' + return cls(value) + + @classmethod + def from_str(cls, str_val: str): + assert isinstance(str_val, str) + return cls(str_val) + + @classmethod + def build(cls, value: str | Self) -> Self: + if isinstance(value, cls): + return value + if isinstance(value, str): + return cls.from_str(value) + raise TypeError(f'Unexpected type {type(value)}') + + +class Device: + __device_type: ArrayDeviceType + __device_id: Optional[int] + __instances: Dict[Tuple[ArrayDeviceType, Optional[int]], Self] = dict() + _is_initialized: bool + DEFAULT_ID: int = 0 + + DEVICE_CPU: str = 'cpu' + DEVICE_CUDA: str = 'cuda' + + def is_cpu(self) -> bool: + return self.__device_type is ArrayDeviceType.CPU + + def is_cuda(self) -> bool: + return self.__device_type is ArrayDeviceType.CUDA + + @property + def device_type(self) -> ArrayDeviceType: + return self.__device_type + + @property + def device_id(self) -> Optional[int]: + return self.__device_id + + def __init__(self, device_type: ArrayDeviceType, device_id: Optional[int] = None) -> None: + if not hasattr(self, "_is_initialized") or not self._is_initialized: + self.__device_type = device_type + if self.is_cuda() and device_id is None: + device_id = Device.DEFAULT_ID + self.__device_id = device_id + self._is_initialized = True + + def __repr__(self) -> str: + dev_id = f":{self.device_id}" if self.device_type is ArrayDeviceType.CUDA else "" + return f"{self.device_type.value}{dev_id}" + + def __reduce__(self): + # Return a tuple representing the pickling state + return self.__class__, (self.__device_type, self.__device_id) + + def __new__(cls, device_type: ArrayDeviceType, device_id: Optional[int] = None): + device_id = device_id if device_type is ArrayDeviceType.CUDA else None + if device_type is ArrayDeviceType.CUDA and device_id is None: + device_id = cls.DEFAULT_ID + + key = (device_type, device_id) + if key not in cls.__instances: + inst = super().__new__(cls) + cls.__instances[key] = inst + return cls.__instances[key] + + @classmethod + def parse_input(cls, device: str | int) -> Tuple[ArrayDeviceType, Optional[int]]: + assert device is not None, f"device must not be None" + if isinstance(device, int): + return ArrayDeviceType.CUDA, device + + assert isinstance(device, str), f"device must either be int (GPU:device) or str (cpu|cuda)[:device]" + regex = r'^(cpu|cuda)(:(\d+))?$' + match = re.match(regex, device) + + assert match is not None and len(match.groups()) == 3, f"Unexpected input format: {device}" + groups = match.groups() + + if groups[0] == cls.DEVICE_CPU: + # Handle the "cpu" case + return ArrayDeviceType.CPU, None + elif groups[0] == cls.DEVICE_CUDA and groups[2] is None: + return ArrayDeviceType.CUDA, cls.DEFAULT_ID + elif groups[0] == cls.DEVICE_CUDA and groups[2] is not None: + device_id = int(groups[2]) + return ArrayDeviceType.CUDA, device_id + raise ValueError(f"Unexpected input format: {device}") + + @classmethod + def build(cls, device: str | int): + device_type, device_id = cls.parse_input(device) + return cls(device_type, device_id) + + +class ArrayAdapter(Callable): + + func_map: Mapping[Callable, Callable] + input_device: Optional[str | Device] + output_device: Optional[str | Device] + contingent_device: Device + + id: int + + @classmethod + def get_device(cls, arr: TYPE_ARRAY): + if not cls.is_array(arr): + logging.warning(f"{__name__} {type(arr)} is not an Array") + return None + if cls.is_numpy(arr): + return Device.build(Device.DEVICE_CPU) + assert cls.is_cupy(arr), f"Not a Cupy array: {type(arr)}" + device_id = arr.device.id + return Device(ArrayDeviceType.CUDA, device_id) + + @staticmethod + def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]: + return isinstance(arr, np.ndarray) + + @staticmethod + def is_cupy(arr) -> TypeGuard[TYPE_CP]: + return cupy_installed() and isinstance(arr, cp.ndarray) + + @staticmethod + def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP: + if ArrayAdapter.is_numpy(arr) or isinstance(arr, Number): + return np.array(arr, copy=copy) + assert ArrayAdapter.is_cupy(arr) + # logging.debug(f"{__name__}: CUPY->NUMPY detected. Expect latency.") + return arr.get() + + @staticmethod + def to_cupy(arr: TYPE_ARRAY | Number, device: Device, copy: bool = False) -> TYPE_CP: + assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \ + f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}" + assert device is not None and isinstance(device, Device) and device.is_cuda(), f"{device} is not CUDA device" + if ArrayAdapter.is_cupy(arr) and arr.device.id == device.device_id and not copy: + # logging.warning(f"{__name__}: Same Device. Return self:" + # f" {device.device_id}. {cp.cuda.runtime.deviceGetPCIBusId(arr.device.id)}") + return arr + with cp.cuda.Device(device.device_id): + return cp.array(arr, copy=copy) + + @staticmethod + def array_device_type(arr: TYPE_ARRAY) -> Device: + on_cpu = ArrayAdapter.is_numpy(arr) + if on_cpu: + return Device.build(Device.DEVICE_CPU) + assert cupy_installed() and ArrayAdapter.is_cupy(arr) + return Device.build(f"{Device.DEVICE_CUDA}:{arr.device.id}") + + @staticmethod + def is_array(arr: TYPE_ARRAY) -> bool: + return ArrayAdapter.is_numpy(arr) or ArrayAdapter.is_cupy(arr) + + @classmethod + def new_array(cls, arr: Any, array_device: Device) -> TYPE_ARRAY: + if cls.is_array(arr): + return cls._move_to_device(arr, array_device, copy=True) + if isinstance(arr, Iterable): + arr = cls.curate_arrays_device(*arr, device=array_device, copy=True) + if array_device.is_cuda(): + assert cupy_installed() + with cp.cuda.Device(array_device.device_id): + return cp.asarray(arr) + elif array_device.DEVICE_CPU: + return np.asarray(arr) + raise TypeError(f'Unexpected device {ArrayDeviceType}') + + def asarray(self, arr: Any) -> TYPE_ARRAY: + return self.__class__.new_array(arr, self.output_device) + + @classmethod + def _move_to_device(cls, + arr: Optional[TYPE_ARRAY], + device: Optional[Device], copy: bool = False) -> TYPE_ARRAY: + # structural match > py3.10 + if device is None or not cls.is_array(arr): + logging.debug(f"Not Array") + return arr + assert device is not None + if device.is_cpu(): + logging.debug(f"Move to CPU. InputType: {type(arr)}. Input Device: {cls.get_device(arr)}," + f" shape{arr.shape}") + return ArrayAdapter.to_numpy(arr, copy=copy) + elif device.is_cuda(): + logging.debug(f"Move to GPU: {device}. InputType: {type(arr)}. Input Device: {cls.get_device(arr)}" + f" shape{arr.shape}") + return ArrayAdapter.to_cupy(arr, device, copy=copy) + raise ValueError(f"Unsupported device: {device}") + + def sync(self, arr: Optional[TYPE_ARRAY | Number], copy: bool = False) -> TYPE_ARRAY: + if not self.__class__.is_array(arr): + return arr + return self.__class__._move_to_device(arr, device=self.output_device, copy=copy) + + @classmethod + def curate_device_helper(cls, arr: TYPE_ARRAY, device: Optional[Device], copy: bool): + if arr is not None and cls.is_array(arr): + arr = cls._move_to_device(arr, device, copy=copy) + return arr + + @classmethod + def curate_arrays_device(cls, *arrays: TYPE_ARRAY, + device: Optional[Device], + copy: bool = False) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + """Curate the device type of one or more arrays. + + Args: + *arrays: + device: + copy: + + Returns: + + """ + # already an array - no need to recursively unpack + if cls.is_array(arrays): + return cls.curate_device_helper(arrays, device, copy) + # not array and not iterable --> unpack + if not isinstance(arrays, Iterable): + return arrays + # only one input + if len(arrays) == 1: + return cls.curate_device_helper(arrays[0], device=device, copy=copy) + out_list = [] + for o in arrays: + out_list.append(cls.curate_device_helper(o, device=device, copy=copy)) + return tuple(out_list) + + @staticmethod + def get_api(cpu_func: Callable, + func_map: Mapping[Callable, Callable], + device: Optional[Device]) -> Tuple[Callable, Device]: + if device is None: + logging.debug(f"Device unspecified in both input data and input device. Try: gpu:0") + device = Device.build(Device.DEVICE_CUDA) + if device.is_cpu(): + return cpu_func, device + assert device.is_cuda() + mapped = func_map.get(cpu_func, None) + if mapped is not None: + return mapped, device + # if not implemented + func_name = getattr(cpu_func, '__qualname__', cpu_func.__name__) + logging.info(f"{__name__}: {func_name} does not have a GPU implementation. Revert to CPU") + return cpu_func, Device.build(Device.DEVICE_CPU) + + @classmethod + def call_helper(cls, func_in: Optional[TYPE_ARRAY], func_device: Device, func: Callable, *curated_args, + **curated_kwargs): + if cls.is_cupy(func_in) or (func_in is None and func_device.is_cuda()): + assert func_in is None or func_in.device.id == func_device.device_id, \ + f"{func_device} mismatch {func_in is None or func_in.device}" + with cp.cuda.Device(func_device.device_id): + return func(func_in, *curated_args, **curated_kwargs) + else: + return func(func_in, *curated_args, **curated_kwargs) + + @classmethod + def unified_call(cls, + cpu_func: Callable, + func_map: Mapping[Callable, Callable], + input_device: Optional[Device], + output_device: Optional[Device], + data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: + # use input_device to override the current device, if not None + data = cls.curate_arrays_device(data, device=input_device, copy=False) + # if data is None --> use input device. + # if input_device is None, by default will invoke GPU interface + input_type = cls.array_device_type(data) if data is not None else input_device + # attempt to fetch the op, revert to CPU if GPU impl is not available (func=GPU impl, func_device=cuda + func, func_device = cls.get_api(cpu_func, func_map, input_type) + logging.debug(f"{__name__}: Call Adapter for {cpu_func} with " + f"In Device: {input_device}, Out Device: {output_device}." + f"Mapped to: {func} and actual input device: {func_device}") + func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=False) + + curated_args = cls.curate_arrays_device(*args, device=func_device, copy=False) + curated_kwargs = dict() + for k, v in kwargs.items(): + curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=False) + + output = cls.call_helper(func_in, func_device, func, *curated_args, **curated_kwargs) + # only move the output around if the output is an array + if isinstance(output, tuple): + return cls.curate_arrays_device(*output, device=output_device, copy=False) + return cls.curate_arrays_device(output, device=output_device, copy=False) + + @classmethod + def _validate_device(cls, device: Optional[Device | str | int]) -> Optional[Device]: + if device is None: + return None + if isinstance(device, (str, int)): + device = Device.build(device) + + assert isinstance(device, Device), f"{type(device)}" + + if device.is_cpu(): + return device + + assert device.is_cuda(), f"Unsupported device_type: {device}" + if not cupy_installed(): + logging.info(f"Cupy is not installed. Revert to CPU") + return Device.build(Device.DEVICE_CPU) + return device + + def __init__(self, + input_device: Optional[str | int | Device], + output_device: Optional[str | int | Device], + func_map: Mapping[Callable, Callable], + contingent_device: Device, + ): + self.input_device = self.__class__._validate_device(input_device) + self.output_device = self.__class__._validate_device(output_device) + self.func_map = func_map + self.contingent_device = contingent_device + + @classmethod + def build(cls, + input_device: Optional[str | int | Device], + output_device: Optional[str | int | Device], + func_map: Mapping[Callable, Callable] = FUNC_MAP, + contingent_device: Device = None) -> Self: + if contingent_device is None: + contingent_device = output_device + return cls(input_device=input_device, output_device=output_device, func_map=func_map, + contingent_device=contingent_device) + + def apply(self, /, cpu_func: Callable, data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY: + return self.unified_call(cpu_func, self.func_map, self.input_device, self.output_device, + data, *args, **kwargs) + + def __call__(self, cpu_func: Callable) -> Callable: + return functools.partial(self.apply, cpu_func) + + @staticmethod + def __sync_device_output_helper(*arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + assert len(arrays) > 0 + if len(arrays) == 1: + return arrays[0] + return arrays + + @classmethod + def device_sync_all_helper(cls, *arrays: TYPE_ARRAY, + array_device: Optional[Device], + contingent_device: Optional[Device]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + assert isinstance(arrays, tuple), f"input check. {type(arrays)} is not a tuple" + if array_device is not None: + return cls.__sync_device_output_helper(tuple(cls._move_to_device(arr, array_device) for arr in arrays)) + assert array_device is None + has_contingent_device = any(cls.array_device_type(arr) is contingent_device for arr in arrays) + if has_contingent_device: + assert contingent_device is not None + return cls.__sync_device_output_helper(cls.device_sync_all_helper(*arrays, + array_device=contingent_device, + contingent_device=None)) + return cls.__sync_device_output_helper(arrays) + + def device_sync_all(self, *arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]: + """Synchronize the device of all arrays to be + + Args: + *arrays: + + Returns: + + """ + return self.__class__.device_sync_all_helper(*arrays, array_device=self.output_device, + contingent_device=self.contingent_device) + + @classmethod + def binary_operation(cls, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY, + input_device: Optional[Device], + output_device: Optional[Device], + contingent_device: Optional[Device], + op: Callable) -> TYPE_ARRAY: + arr1, arr2 = cls.device_sync_all_helper(arr1, arr2, array_device=input_device, + contingent_device=contingent_device) + result: TYPE_ARRAY = op(arr1, arr2) + return cls.curate_arrays_device(result, device=output_device, copy=True) + + def and_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=and_) + + def or_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=or_) + + def add(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=add) + + def sub(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=sub) + + def mul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=mul) + + def matmul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=matmul) + + def truediv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=truediv) + + def floordiv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=floordiv) + + def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY: + return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device, + output_device=self.output_device, + contingent_device=self.contingent_device, + op=xor) + + @classmethod + def imsave(cls, filename: str, arr: TYPE_ARRAY, asynch: bool = True, **kwargs): + logging.debug(f"{__name__}: SHAPE DBG {arr.shape}") + arr = cls.curate_arrays_device(arr, device=Device.build(Device.DEVICE_CPU), copy=True) + logging.debug(f"{__name__}: TYPE DBG {type(arr)}") + if not asynch: + skimage.io.imsave(filename, arr, **kwargs) + return + asyncio.run(async_write_image(filename, arr, **kwargs)) diff --git a/histoqc/array_adapter/array_api_compat.py b/histoqc/array_adapter/array_api_compat.py new file mode 100644 index 0000000..4527a6e --- /dev/null +++ b/histoqc/array_adapter/array_api_compat.py @@ -0,0 +1,4 @@ +try: + import array_api_compat +except ImportError: + ... diff --git a/histoqc/array_adapter/func_mapping.py b/histoqc/array_adapter/func_mapping.py new file mode 100644 index 0000000..8ecc0ca --- /dev/null +++ b/histoqc/array_adapter/func_mapping.py @@ -0,0 +1,60 @@ +import scipy.ndimage +import skimage +import sklearn +from typing import Callable, Mapping +from scipy import ndimage as sci_ndi +import numpy as np +try: + import cupy as cp + from scipy import signal as sci_signal + from cupyx.scipy import signal as cu_signal + from cucim import skimage as cu_skimage + from cupyx.scipy import ndimage as cu_ndi + from sklearn import naive_bayes as sk_naive_bayes + from cuml import naive_bayes as cuml_naive_bayes + import cuml + from cuml import ensemble as cuml_ensemble + from sklearn import ensemble as sk_ensemble + + # noinspection PyUnresolvedReferences + FUNC_MAP: Mapping[Callable, Callable] = { + skimage.color.convert_colorspace: cu_skimage.color.convert_colorspace, + skimage.color.rgb2gray: cu_skimage.color.rgb2gray, + skimage.color.separate_stains: cu_skimage.color.separate_stains, + skimage.exposure.equalize_hist: cu_skimage.exposure.equalize_hist, + # not implemented + # skimage.feature.graycomatrix: cu_skimage.feature.graycomatrix, + # skimage.feature.local_binary_pattern: cu_skimage.feature.local_binary_pattern, + skimage.filters.frangi: cu_skimage.filters.frangi, + skimage.filters.gaussian: cu_skimage.filters.gaussian, + skimage.filters.gabor_kernel: cu_skimage.filters.gabor_kernel, + skimage.filters.gabor: cu_skimage.filters.gabor, + skimage.filters.laplace: cu_skimage.filters.laplace, + skimage.filters.median: cu_skimage.filters.median, + + # skimage.filters.rank.otsu: cu_skimage.filters.rank.otsu, + skimage.filters.sobel: cu_skimage.filters.sobel, + skimage.filters.threshold_otsu: cu_skimage.filters.threshold_otsu, + skimage.measure.regionprops: cu_skimage.measure.regionprops, + skimage.morphology.binary_opening: cu_skimage.morphology.binary_opening, + # the morphology.label is just an alias of the measure.label + skimage.morphology.label: cu_skimage.measure.label, + skimage.morphology.dilation: cu_skimage.morphology.dilation, + skimage.morphology.disk: cu_skimage.morphology.disk, + skimage.morphology.remove_small_holes: cu_skimage.morphology.remove_small_holes, + skimage.morphology.remove_small_objects: cu_skimage.morphology.remove_small_objects, + skimage.transform.resize: cu_skimage.transform.resize, + skimage.util.img_as_bool: cu_skimage.util.img_as_bool, + skimage.util.img_as_ubyte: cu_skimage.util.img_as_ubyte, + sci_ndi.convolve: cu_ndi.convolve, + + # can be replaced by erosion, but is actually slower for uint dtypes + skimage.filters.rank.minimum: None, # cu_skimage.morphology.erosion, + sci_signal.convolve2d: cu_signal.convolve2d, + sci_ndi.generate_binary_structure: cu_ndi.generate_binary_structure, + np.digitize: cp.digitize, + # sk_naive_bayes.GaussianNB: cuml_naive_bayes.GaussianNB, + # sk_ensemble.RandomForestClassifier: cuml_ensemble.RandomForestClassifier, + } +except ImportError: + FUNC_MAP = dict() diff --git a/histoqc/array_adapter/implementation.py b/histoqc/array_adapter/implementation.py new file mode 100644 index 0000000..e69de29 diff --git a/histoqc/array_adapter/typing.py b/histoqc/array_adapter/typing.py new file mode 100644 index 0000000..900ceac --- /dev/null +++ b/histoqc/array_adapter/typing.py @@ -0,0 +1,13 @@ +from __future__ import annotations +import numpy as np +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # for forward reference only -- annotate the objects from the optional cupy dependency + # noinspection PyUnresolvedReferences + import cupy as cp + + +TYPE_NP = np.ndarray +TYPE_CP = "cp.ndarray" +TYPE_ARRAY = "np.ndarray | cp.ndarray" diff --git a/histoqc/config/__main__.py b/histoqc/config/__main__.py index 3f2b1c0..493e09e 100644 --- a/histoqc/config/__main__.py +++ b/histoqc/config/__main__.py @@ -9,7 +9,7 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] - parser = argparse.ArgumentParser(prog="histoqc.config",description="Show example configuration files") + parser = argparse.ArgumentParser(prog="histoqc.config", description="Show example configuration files") parser.add_argument('--list', action='store_true', help='list available configs') diff --git a/histoqc/config/config.ini b/histoqc/config/config.ini index 0ee89bd..86872de 100644 --- a/histoqc/config/config.ini +++ b/histoqc/config/config.ini @@ -26,6 +26,7 @@ steps= BasicModule.getBasicStats [BaseImage.BaseImage] image_work_size = 1.25x +handles = openslide #not yet implemented confirm_base_mag: False diff --git a/histoqc/import_wrapper/__init__.py b/histoqc/import_wrapper/__init__.py index e69de29..e216514 100644 --- a/histoqc/import_wrapper/__init__.py +++ b/histoqc/import_wrapper/__init__.py @@ -0,0 +1 @@ +from .helper import dynamic_import diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py new file mode 100644 index 0000000..02eb76e --- /dev/null +++ b/histoqc/import_wrapper/cupy_extra.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +try: + import cupy + # cupy.cuda.set_allocator(None) +except ImportError: + cupy = None +finally: + cp = cupy + + +def cupy_installed() -> bool: + try: + import cupy + return True + except ImportError: + return False diff --git a/histoqc/import_wrapper/dask_cuda.py b/histoqc/import_wrapper/dask_cuda.py new file mode 100644 index 0000000..8b98acf --- /dev/null +++ b/histoqc/import_wrapper/dask_cuda.py @@ -0,0 +1,12 @@ +try: + import dask_cuda +except ImportError: + dask_cuda = None + + +def dask_cuda_installed() -> bool: + try: + import dask_cuda + return True + except ImportError: + return False diff --git a/histoqc/import_wrapper/helper.py b/histoqc/import_wrapper/helper.py index 7804e3a..aa97227 100644 --- a/histoqc/import_wrapper/helper.py +++ b/histoqc/import_wrapper/helper.py @@ -1,23 +1,84 @@ +from __future__ import annotations import importlib -from typing import Union +from typing import Optional, List -def dynamic_import(module_name: str, attribute_name: str, surrogate: Union[str, None]): - """ - Dynamically import the components from surrogate module if not available (e.g., `Literal` is only available in - typing from python3.8 but typing_extension provides the same functionality for python <=3.7. +def __dynamic_import(module_name: str, attribute_name: Optional[str]): + """Base function to import a module or a component from the module via importlib + Args: - module_name: - attribute_name: - surrogate: + module_name: name of the module + attribute_name: name of the attribute. If None, then returns the module itself. Returns: + imported module or component + Raises + ImportError: if the module cannot be imported or the attribute is not found. """ + assert module_name is not None and isinstance(module_name, str) module = importlib.import_module(module_name) - attribute = getattr(module, attribute_name, None) - if attribute is not None: - return attribute - if surrogate is not None: - return dynamic_import(surrogate, attribute_name, None) - raise ImportError(f"Cannot Import {attribute_name} from either {module_name} or {surrogate}") + if attribute_name is None: + return module + if not hasattr(module, attribute_name): + raise ImportError(f"'{module_name}' has no attribute '{attribute_name}'") + return getattr(module, attribute_name) + + +def __validate_names(names: str | List[str], pad_length: Optional[int] = 1) -> List[Optional[str]]: + """Validate the names provided to be not None and in form of a list. + + If the names is a str, returns a list with a singleton value of names and length of pad_length. + + Args: + names: the value to be validated + pad_length: length to + + Returns: + List of names. + """ + if isinstance(names, str) or names is None: + return [names] * pad_length + if isinstance(names, list) and len(names) == 1 and pad_length > 1: + return names * pad_length + assert isinstance(names, List), f"{type(names)}" + return names + + +def dynamic_import(module_names: List[str] | str, + attr_names: Optional[List[Optional[str]] | str], + return_first: bool): + """Dynamically import the module or attribute from the modules via importlib. + + Priority is defined by the order of module_names/attr_names. The function will continue to try to import the module + until all attempts fail and raise an ImportError in this case. + If return_first is True, the function only returns the first viable module/attribute. Otherwise, it returns the list + of all available modules/attributes. + + Args: + module_names: names of the modules. If it is str it will be padded to a list of single element: [names] + attr_names: names of the attributes. If None, then only the module is imported. + If it is a str or a list of single element, it will be padded to a list of same values + with same length as module_names. + return_first: If return_first is True, the function only returns the first viable module/attribute. + Returns: + imported module or attribute + + Raises: + ImportError: if the module cannot be imported or the attribute is not found. + """ + module_names = __validate_names(module_names) + attr_names = __validate_names(attr_names, pad_length=len(module_names)) + assert len(module_names) == len(attr_names) + out_list = [] + for module, attr in zip(module_names, attr_names): + try: + result = __dynamic_import(module, attr) + if return_first: + return result + out_list.append(result) + except ImportError: + continue + if len(out_list) == 0: + raise ImportError(f"Cannot Import from: {module_names}, {attr_names}") + return out_list diff --git a/histoqc/import_wrapper/openslide.py b/histoqc/import_wrapper/openslide.py index aaa007f..2e7c67c 100644 --- a/histoqc/import_wrapper/openslide.py +++ b/histoqc/import_wrapper/openslide.py @@ -1,9 +1,16 @@ +""" +For python >=3.8, the behavior of import dlls in Windows is changed. add_dll_directory is added to os and must be +manually called to include the path(s) of binaries. (in previous versions, extending the PATH variable is enough) +""" import os if hasattr(os, "add_dll_directory"): # specify your own openslide binary locations with os.add_dll_directory(os.path.join(os.getcwd(), 'bin')): + # noinspection PyUnresolvedReferences import openslide else: # os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] # #can either specify openslide bin path in PATH, or add it dynamically + # noinspection PyUnresolvedReferences import openslide + diff --git a/histoqc/import_wrapper/typing.py b/histoqc/import_wrapper/typing.py deleted file mode 100644 index 4e4ed03..0000000 --- a/histoqc/import_wrapper/typing.py +++ /dev/null @@ -1,6 +0,0 @@ -from .helper import dynamic_import -from typing import Type, Callable, Tuple, Any, TypeVar - -__TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] -Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") -get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions") diff --git a/histoqc/tests/test_pipeline_cli.py b/histoqc/tests/test_pipeline_cli.py index 244a075..2ebcf9d 100644 --- a/histoqc/tests/test_pipeline_cli.py +++ b/histoqc/tests/test_pipeline_cli.py @@ -56,6 +56,7 @@ def minimal_config(tmp_path_factory): [BaseImage.BaseImage] image_work_size = 1.25x + handles = openslide #three options: relative2mask, absolute, relative2image mask_statistics = relative2mask @@ -87,7 +88,8 @@ def test_cli_multiprocess_batching(multi_svs_dir, tmp_path, minimal_config, tmp_ '--symlink', os.fspath(spth), '*.svs' ]) == 0 - assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log', 'results_0.tsv', 'results_1.tsv']) + assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log', + 'results_0.tsv', 'results_1.tsv']) @pytest.fixture(scope='module') diff --git a/histoqc/tests/test_ui_cli.py b/histoqc/tests/test_ui_cli.py index c72aa4e..ff91739 100644 --- a/histoqc/tests/test_ui_cli.py +++ b/histoqc/tests/test_ui_cli.py @@ -1,8 +1,6 @@ import os import threading import time -from http.server import HTTPServer -from typing import Optional import pytest import requests diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py new file mode 100644 index 0000000..4a0e6ab --- /dev/null +++ b/histoqc/wsi_handles/base.py @@ -0,0 +1,382 @@ +from __future__ import annotations +from abc import ABC, abstractmethod + +from histoqc.import_wrapper import dynamic_import +import logging +from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic, Optional, cast +import numpy as np +from PIL.Image import Image as PILImage +from typing_extensions import final +from histoqc.array_adapter import ArrayDeviceType, ArrayAdapter, Device +import os + +from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER + +T = TypeVar('T') +Backend = TypeVar('Backend') +ARRAY = TypeVar('ARRAY') + + +class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]): + + handle: T + fname: str + _adapter: ArrayAdapter + _device: Device + _num_threads: int + + @property + def device(self) -> Device: + return self._device + + @staticmethod + def curate_shorter_edge(width, height, limit, aspect_ratio): + """Simulate the PIL.Image.Image.thumbnail approach to curate the size. + + The target size should preserve the aspect ratio. + + Args: + width: + height: + limit: + aspect_ratio: + + Returns: + + """ + if height > width: + # limit the shorter one + width = max(width, limit) + height = round(width / aspect_ratio) + else: + height = max(height, limit) + width = round(height * aspect_ratio) + return width, height + + @staticmethod + def curate_to_max_dim(width, height, max_size, aspect_ratio): + """Set the longer one of width and height to max_size while preserving the aspect ratio. + + Args: + width: + height: + max_size: + aspect_ratio: + + Returns: + width, height tuple + """ + if height > width: + height = max_size + width = round(height * aspect_ratio) + else: + width = max_size + height = round(width / aspect_ratio) + return width, height + + @property + @abstractmethod + def associated_images(self) -> Mapping[str, Backend]: + ... + + @property + @abstractmethod + def background_color(self) -> str: + ... + + @property + @abstractmethod + def bounding_box(self) -> Tuple[int, int, int, int]: + ... + + @property + @abstractmethod + def has_bounding_box(self) -> bool: + ... + + @property + @abstractmethod + def dimensions(self) -> Tuple[int, int]: + """ + + Returns: + (width, height) tuple + """ + ... + + @property + @abstractmethod + def magnification(self) -> str: + ... + + @property + @abstractmethod + def level_count(self) -> int: + ... + + @property + @abstractmethod + def level_dimensions(self) -> Sequence[Tuple[int, int]]: + ... + + @property + @abstractmethod + def level_downsamples(self) -> Sequence[float]: + ... + + @property + @abstractmethod + def vendor(self) -> str: + ... + + @property + @abstractmethod + def mpp_x(self) -> str: + ... + + @property + @abstractmethod + def mpp_y(self) -> str: + ... + + @property + @abstractmethod + def comment(self) -> str: + ... + + @abstractmethod + def get_thumbnail(self, new_dim) -> PILImage: + ... + + @abstractmethod + def backend_rgba2rgb(self, img) -> Backend: + """Remove the alpha channel with a predefined background color blended into the image. + + Args: + img: + + Returns: + R + """ + ... + + @abstractmethod + def region_backend(self, location, level, size, **kwargs): + ... + + @staticmethod + @abstractmethod + def backend_to_pil(region: Union[Backend, ARRAY]) -> PILImage: + ... + + @staticmethod + @abstractmethod + def array_to_numpy(arr: ARRAY) -> np.ndarray: + ... + + @staticmethod + @abstractmethod + def backend_dim(region: Backend) -> Tuple[int, int]: + """ + Defines the unified interface to obtain BACKEND handle type. + Args: + region: + + Returns: + + """ + ... + + @staticmethod + @abstractmethod + def array_shape(arr: ARRAY) -> Tuple[int, int]: + ... + + @staticmethod + @abstractmethod + def backend_to_array(region: Union[Backend, ARRAY], device: Optional[Device]) -> ARRAY: + ... + + def read_region(self, location, level, size, **kwargs) -> PILImage: + region = self.region_backend(location=location, level=level, size=size, **kwargs) + return self.__class__.backend_to_pil(region) + + @abstractmethod + def read_label(self) -> Backend: + ... + + @abstractmethod + def read_macro(self) -> Backend: + ... + + @classmethod + @abstractmethod + def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int], device: Optional[Device]) -> ARRAY: + ... + + @abstractmethod + def get_best_level_for_downsample(self, downsample_factor: float): + ... + + def curated_best_level_for_downsample(self, downsample_factor: float) -> Tuple[int, bool]: + relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in self.level_downsamples] + level = np.where(relative_down_factors_idx)[0] + if level.size: + return cast(int, level[0]), True + return self.get_best_level_for_downsample(downsample_factor), False + + @staticmethod + @abstractmethod + def grid_stack(grid: List[List[ARRAY]], device: Optional[device]) -> ARRAY: + ... + + def resize_tile_downward(self, target_downsampling_factor, level, + win_size: int = 2048, **read_region_kwargs) -> List[List[ARRAY]]: + + (bx, by, bwidth, bheight) = self.bounding_box + end_x = bx + bwidth + end_y = by + bheight + + closest_downsampling_factor = self.level_downsamples[level] + + # create a new img + grid = [] + for x in range(bx, end_x, win_size): + row_piece = [] + for y in range(by, end_y, win_size): + win_width, win_height = [win_size] * 2 + # Adjust extraction size for endcut + if end_x < x + win_width: + win_width = end_x - x + if end_y < y + win_height: + win_height = end_y - y + + win_down_width = int(round(win_width / target_downsampling_factor)) + win_down_height = int(round(win_height / target_downsampling_factor)) + + win_width = int(round(win_width / closest_downsampling_factor)) + win_height = int(round(win_height / closest_downsampling_factor)) + + # TODO Note: this isn't very efficient, and if more efficiency isneeded + + # TODO cont. Separate the public interface read_region -> PIL.Image to the internal data backend + # TODO (data_from_region) + # TODO e.g., cupy is far more efficient for resize w/ interpolation and antialiasing. + closest_region = self.region_backend(location=(x, y), level=level, size=(win_width, win_height), + **read_region_kwargs) + if np.shape(closest_region)[-1] == 4: + closest_region = self.backend_rgba2rgb(closest_region) + closest_region_arr = self.__class__.backend_to_array(closest_region, self.device) + target_region = self.__class__.region_resize_arr(closest_region_arr, + (win_down_width, win_down_height), + device=self.device) + row_piece.append(target_region) + # row_piece = np.concatenate(row_piece, axis=0) + grid.append(row_piece) + # grid = np.concatenate(output, axis=1) + # + return self.__class__.grid_stack(grid, device=self.device) + + def best_thumb(self, x: int, y: int, dims: Tuple[int, int], + target_sampling_factor: float, **read_region_kwargs) -> ARRAY: + + # get thumb from og + if not self.has_bounding_box: + max_dim = dims[0] if dims[0] > dims[1] else dims[1] + return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim)), device=self.device) + + (level, is_exact_level) = self.curated_best_level_for_downsample(target_sampling_factor) + + # check if to get the existing level + if is_exact_level: + backend: Backend = self.read_region((x, y), level, dims) + return self.__class__.backend_to_array(self.backend_rgba2rgb(backend), device=self.device) \ + if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend, device=self.device) + # scale down the thumb img from the next high level + else: + return self.resize_tile_downward(target_sampling_factor, level, win_size=2048, **read_region_kwargs) + + @staticmethod + def parse_wsi_handles(handle_list: str | List[str], delimiter: str, + wsi_handle_dict: Dict[str, Tuple[str, str]]) -> Tuple[List[str], List[str]]: + if isinstance(handle_list, str): + handle_list = handle_list.split(delimiter) + module_list = [] + attr_list = [] + for handle_type in handle_list: + handle_type = handle_type.strip() + if handle_type not in wsi_handle_dict: + msg = f"WSIImageHandle: \"{handle_type}\" is not a registered handle" + logging.warning(msg) + continue + result = wsi_handle_dict[handle_type] + assert len(result) == 2, f"{result}" + module, attr = wsi_handle_dict[handle_type] + module_list.append(module) + attr_list.append(attr) + return module_list, attr_list + + @classmethod + def __create_handle(cls, fname: str, + handle_class_list: List[Callable[[str, Optional[int], Optional[int]], "WSIImageHandle"]], + device_id: Optional[int], + num_threads: Optional[int]) -> "WSIImageHandle": + image_handle = None + assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file" + for handle_class in handle_class_list: + # noinspection PyBroadException + try: + image_handle = handle_class(fname, device_id, num_threads) + break + except Exception: + # current wsi handle class doesn't support this file + msg = f"WSIImageHandle: \"{handle_class}\" doesn't support {fname}" + logging.warning(msg) + continue + if image_handle is None: + # error: no handles support this file + msg = f"WSIImageHandle: can't find the support wsi handles - {fname}" + logging.error(msg) + raise NotImplementedError(msg) + return image_handle + + @classmethod + @final + def build_handle(cls, fname: str, handles: str, device_id: Optional[int], + num_threads: Optional[int]) -> "WSIImageHandle": + # get handles list + module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES) + handle_class_list = dynamic_import(module_list, attr_list, return_first=False) + image_handle = cls.__create_handle(fname, handle_class_list, device_id, num_threads) + return image_handle + + def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int]): + self.fname = fname + self._device = Device(self.device_type, device_id) + self._adapter = ArrayAdapter.build(input_device=self._device, output_device=self._device, + contingent_device=self._device) + self._num_threads = num_threads if num_threads is not None else 1 + + @abstractmethod + def close_handle(self): + ... + + def close(self): + self.close_handle() + self.handle = None + + def is_closed(self): + return not hasattr(self, "handle") or self.handle is None + + @property + @abstractmethod + def device_type(self) -> ArrayDeviceType: + raise NotImplementedError + + @property + def adapter(self) -> ArrayAdapter: + return self._adapter + + @abstractmethod + def release(self): + ... diff --git a/histoqc/wsi_handles/constants.py b/histoqc/wsi_handles/constants.py new file mode 100644 index 0000000..9fcec88 --- /dev/null +++ b/histoqc/wsi_handles/constants.py @@ -0,0 +1,20 @@ +from typing import Dict, Tuple +from typing_extensions import Literal, get_args + +TYPE_OPENSLIDE = Literal["openslide"] +KEY_OPENSLIDE: str = get_args(TYPE_OPENSLIDE)[0] +MODULE_OPENSLIDE: str = "histoqc.wsi_handles.openslide_handle" +CLASS_OPENSLIDE: str = "OpenSlideHandle" + +TYPE_CUCIM = Literal["cucim"] +KEY_CUCIM: str = get_args(TYPE_CUCIM)[0] +MODULE_CUCIM: str = "histoqc.wsi_handles.cuimage_handle" +CLASS_CUCIM: str = "CuImageHandle" + +WSI_HANDLES: Dict[str, Tuple[str, str]] = { + KEY_OPENSLIDE: (MODULE_OPENSLIDE, CLASS_OPENSLIDE), + # todo: add unified interface + KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM), +} + +HANDLE_DELIMITER = ',' diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py new file mode 100644 index 0000000..f331586 --- /dev/null +++ b/histoqc/wsi_handles/cuimage_handle.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +import skimage.util +from PIL.Image import Image as PILImage +from cucim.clara import CuImage +from .base import WSIImageHandle +import traceback +from PIL import Image +from ..import_wrapper.openslide import openslide +import cupy as cp +from typing import List, Union, Tuple, Mapping, Optional +from typing import cast +from lazy_property import LazyProperty +import numpy as np +from cucim import skimage as c_skimage +from histoqc.array_adapter import ArrayDeviceType, Device +from types import MappingProxyType +import logging +# import os + + +DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA) + + +class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]): + + # todo: implement GPU-accelerated LANCZOS filter + USE_LANCZOS: bool = False + + handle: Optional[CuImage] + fname: str + _associated_images: Optional[Mapping] + + # TODO: standalone parser of vendor information + dummy_handle_spill: Optional[openslide.OpenSlide] + + def backend_rgba2rgb(self, img: CuImage) -> CuImage: + # todo: it appears that CuImage does not take care of the alpha channel at all. + return img + + @classmethod + def validate_device(cls, device: Optional[Device]): + device = DEFAULT_DEVICE if device is None else device + assert isinstance(device, Device) + return device + + @classmethod + def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device: Optional[Device]) -> cp.ndarray: + device = cls.validate_device(device) + with cp.cuda.Device(device.device_id): + w, h, *_ = new_size_wh + arr = cp.array(data) + return c_skimage.transform.resize(arr, output_shape=(h, w), order=3, anti_aliasing=True) + + def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int] = 2): + super().__init__(fname, device_id, num_threads) + self._associated_images = None + self.handle = CuImage(fname) + # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a + # todo - native unified metadata interface for different vendors. + # todo - workaround as memory spilling option + self.dummy_handle_spill = openslide.OpenSlide(fname) + logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}." + f"Corresponding CUDA device PID: {cp.cuda.runtime.deviceGetPCIBusId(device_id)}") + + @LazyProperty + def background_color(self): + return f"#{self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + + @LazyProperty + def bounding_box(self): + dim_width, dim_height = self.dimensions + x = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0)) + y = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0)) + width = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width)) + height = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height)) + return x, y, width, height + + @LazyProperty + def has_bounding_box(self): + return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle_spill.properties + and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle_spill.properties + ) + + @LazyProperty + def dimensions(self): + return tuple(self.handle.metadata['cucim']['shape'][:2][::-1]) + + @LazyProperty + def magnification(self): + return self.dummy_handle_spill.properties.get("openslide.objective-power") or \ + self.dummy_handle_spill.properties.get("aperio.AppMag") + + @property + def level_count(self): + return self.handle.metadata['cucim']['resolutions']['level_count'] + + @property + def level_dimensions(self): + return self.handle.metadata['cucim']['resolutions']['level_dimensions'] + + @property + def level_downsamples(self): + return self.handle.metadata['cucim']['resolutions']['level_downsamples'] + + @property + def vendor(self): + return self.dummy_handle_spill.properties.get("openslide.vendor", "NA") + + @property + def mpp_x(self): + return self.dummy_handle_spill.properties.get("openslide.mpp-x", "NA") + + @property + def mpp_y(self): + return self.dummy_handle_spill.properties.get("openslide.mpp-y", "NA") + + @property + def comment(self): + return self.dummy_handle_spill.properties.get("openslide.comment", "NA") + + @staticmethod + def _resize_osh(thumb_cp: cp.ndarray, width: int, height: int) -> cp.ndarray: + resized_pil = Image.fromarray(thumb_cp.get() + ).convert("RGB").resize((width, height), + resample=Image.Resampling.LANCZOS) + return c_skimage.util.img_as_ubyte(cp.array(resized_pil, copy=False), force_copy=False) + + @staticmethod + def _resize_skimage(thumb_cp: cp.ndarray, width: int, height: int): + + resized = c_skimage.transform.resize(thumb_cp, output_shape=(height, width), order=0) + return c_skimage.util.img_as_ubyte(resized) + + def get_thumbnail(self, new_dim) -> cp.ndarray: + """Get thumbnail + + Args: + new_dim: Tuple + + Returns: + + """ + # from openslide + with (cp.cuda.Device(self.device.device_id)): + downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim))) + level = self.get_best_level_for_downsample(downsample) + target_w, target_h = (x // int(downsample) for x in self.dimensions) + + # aspect_ratio = self.dimensions[0] / self.dimensions[1] + # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio) + + # resize + # thumb = self.backend_rgba2rgb(self.region_backend(location=None, level=level)) + # + # try: + thumb = self.region_backend(level=level) + thumb_cp: cp.ndarray = cp.array(thumb, copy=False) + try: + if CuImageHandle.USE_LANCZOS: + return CuImageHandle._resize_osh(thumb_cp, target_w, target_h) + else: + return CuImageHandle._resize_skimage(thumb_cp, target_w, target_h) + except Exception: + # self.reload() + logging.error(f"{__name__} - {self.fname}: OOM on {self.device.device_id}." + f"Use CPU" + f"Error Message Dumped: {traceback.format_exc()}. Try CPU method...") + resized_pil = self.dummy_handle_spill.get_thumbnail(new_dim).convert("RGB") + return cp.array(resized_pil, copy=False) + + def get_best_level_for_downsample(self, down_factor: float) -> int: + """Return the largest level that's smaller than the target downsample factor, consistent with openslide. + + Args: + down_factor: + + Returns: + + """ + level_downsamples_arr = np.asarray(self.level_downsamples) + # not exceeding the current downsample level + down_indices = np.where(level_downsamples_arr <= down_factor)[0] + down_values = level_downsamples_arr[down_indices] + # find the indices of the down_indices that points to the best downsample factor value + return cast(int, down_indices[down_values.argmax()]) + + def region_backend(self, location=None, level=None, size=None, **kwargs) -> CuImage: + assert level is not None + with cp.cuda.Device(self.device.device_id): + if location is not None and size is not None: + return self.handle.read_region(location=location, level=level, size=size, + num_workers=self._num_threads, + **kwargs) + assert location is None and size is None + return self.handle.read_region(level=level, num_workers=self._num_threads, **kwargs) + + @staticmethod + def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray: + device = CuImageHandle.validate_device(device) + with cp.cuda.Device(device.device_id): + result = cp.array(region, copy=False) + + logging.debug(f"{__name__} - {device.device_id} == {result.device}") + return result + + @staticmethod + def array_to_numpy(arr: cp.ndarray) -> np.ndarray: + return arr.get() + + @classmethod + def backend_to_pil(cls, region: CuImage) -> PILImage: + return Image.fromarray(cls.backend_to_array(region, None).get()) + + def read_label(self) -> CuImage: + return self.handle.associated_image("label") + + def read_macro(self) -> CuImage: + return self.handle.associated_image("macro") + + @classmethod + def new_associated_images(cls, handle: CuImage) -> Mapping: + if handle is None or not hasattr(handle, "associated_images"): + return MappingProxyType(dict()) + keys = handle.associated_images + return MappingProxyType({k: handle.associated_image(k) for k in keys}) + + @property + def associated_images(self) -> Mapping: + handle = getattr(self, "handle", None) + if not hasattr(self, "_associated_images") or self._associated_images is None: + self._associated_images = self.__class__.new_associated_images(handle) + return self._associated_images + + def clear_associated_images(self): + self._associated_images = None + + @staticmethod + def grid_stack(grid: List[List[cp.ndarray]], device: Optional[Device]): + device = CuImageHandle.validate_device(device) + with cp.cuda.Device(device.device_id): + return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1) + + @staticmethod + def backend_dim(region: CuImage) -> Tuple[int, int]: + return cast(Tuple[int, int], tuple(region.size()[:2][::-1])) + + @staticmethod + def array_shape(arr: cp.ndarray) -> Tuple[int, ...]: + return arr.shape + + def release(self): + # todo - what's the better practice? This forces to free everything and can lock up the kernel for a couple + # todo - of seconds + cp.get_default_memory_pool().free_all_blocks() + cp.get_default_pinned_memory_pool().free_all_blocks() + + def close_handle(self): + logging.debug(f"{__name__}: {self.fname} - closed") + if hasattr(self, "handle") and self.handle is not None: + self.handle.close() + del self.handle + self.handle = None + if self.dummy_handle_spill is not None: + self.dummy_handle_spill.close() + self.dummy_handle_spill = None + # clear the cached map + self.clear_associated_images() + # self.release() + + def reload(self): + self.release() + self.handle = CuImage(self.fname) + + @property + def device_type(self) -> ArrayDeviceType: + return ArrayDeviceType.CUDA diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py new file mode 100644 index 0000000..435d53e --- /dev/null +++ b/histoqc/wsi_handles/openslide_handle.py @@ -0,0 +1,170 @@ +import PIL.Image +import numpy as np + +from .base import WSIImageHandle +from histoqc.import_wrapper.openslide import openslide +from typing import Union, Tuple, Sequence, List, Mapping, cast, Optional +from PIL.Image import Image as PILImage +from .utils import rgba2rgb_pil +from PIL import Image +from histoqc.array_adapter import ArrayDeviceType, Device +import logging + + +class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]): + _background_color: str + _magnification_factor: str + _has_bounding_box: bool + fname: str + handle: Optional[openslide.OpenSlide] + + @classmethod + def sanitize_device(cls, device: Optional[Device]): + if device is None: + return + if device.device_type is not ArrayDeviceType.CPU: + logging.warning(f"Expect CPU but got {device.device_type}. No effect." + f"Check upstream device settings") + + def backend_rgba2rgb(self, img) -> PILImage: + return rgba2rgb_pil(img, self.background_color) + + def __init__(self, fname: str, device_id: Optional[int] = None, num_threads: Optional[int] = 1): + super().__init__(fname, device_id, num_threads=num_threads) + self.handle = openslide.OpenSlide(fname) + self._has_bounding_box = True + self._bounding_box = self.__get_bounding_box() + + # get magnification factor from wsi slide + self._magnification_factor = self.handle.properties.get("openslide.objective-power") or \ + self.handle.properties.get("aperio.AppMag") + + # get background color + self._background_color = f"#{self.handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}" + logging.info(f"{__name__}: {fname}: Create OpenSlideHandle at {device_id}. {self.device}") + + def __get_bounding_box(self) -> Tuple[int, int, int, int]: + (dim_width, dim_height) = self.handle.dimensions + + try: + x = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA')) + y = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA')) + width = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA')) + height = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA')) + return x, y, width, height + # if any attribute is 'NA' and fails the int() cast + except ValueError: + self._has_bounding_box = False + return 0, 0, dim_width, dim_height + + @property + def background_color(self): + return self._background_color + + @property + def has_bounding_box(self) -> bool: + return self._has_bounding_box + + @property + def bounding_box(self) -> Tuple[int, int, int, int]: + return self._bounding_box + + @property + def dimensions(self) -> Tuple[int, int]: + return self.handle.dimensions + + @property + def magnification(self) -> Union[str, None]: + return self._magnification_factor + + @property + def level_count(self) -> int: + return self.handle.level_count + + @property + def level_dimensions(self) -> Sequence[Tuple[int, int]]: + return self.handle.level_dimensions + + @property + def level_downsamples(self): + return self.handle.level_downsamples + + @property + def vendor(self): + return self.handle.properties.get("openslide.vendor", "NA") + + @property + def mpp_x(self) -> str: + return self.handle.properties.get("openslide.mpp-x", "NA") + + @property + def mpp_y(self) -> str: + return self.handle.properties.get("openslide.mpp-y", "NA") + + @property + def comment(self) -> str: + return self.handle.properties.get("openslide.comment", "NA") + + @classmethod + def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int], device: Optional[Device]): + cls.sanitize_device(device) + return np.array(Image.fromarray(data).resize(new_size_wh), copy=False) + + def get_thumbnail(self, new_dim): + return self.handle.get_thumbnail(new_dim) + + def get_best_level_for_downsample(self, down_factor): + return self.handle.get_best_level_for_downsample(down_factor) + + def region_backend(self, location, level, size, **kwargs): + return self.handle.read_region(location, level, size) + + @staticmethod + def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage: + if isinstance(region, np.ndarray): + return PIL.Image.fromarray(region) + return region + + @staticmethod + def backend_to_array(region: PILImage, device: Optional[Device]) -> np.ndarray: + OpenSlideHandle.sanitize_device(device) + return np.array(region, copy=False) + + @staticmethod + def array_to_numpy(arr: np.ndarray) -> np.ndarray: + return np.array(arr) + + def read_label(self): + return self.handle.associated_images["label"] + + def read_macro(self): + return self.handle.associated_images["macro"] + + @property + def associated_images(self) -> Mapping[str, PILImage]: + return self.handle.associated_images + + @staticmethod + def grid_stack(grid: List[List[np.ndarray]], device: Optional[Device]) -> np.ndarray: + OpenSlideHandle.sanitize_device(device) + return np.concatenate([np.concatenate(row, axis=0) for row in grid], axis=1) + + @staticmethod + def backend_dim(region: PILImage) -> Tuple[int, int]: + return cast(Tuple[int, int], region.size) + + @staticmethod + def array_shape(arr: np.ndarray) -> Tuple[int, ...]: + return arr.shape + + def close_handle(self): + if hasattr(self, "handle"): + self.handle.close() + self.handle = None + + @property + def device_type(self) -> ArrayDeviceType: + return ArrayDeviceType.CPU + + def release(self): + ... diff --git a/histoqc/wsi_handles/utils.py b/histoqc/wsi_handles/utils.py new file mode 100644 index 0000000..fcceafc --- /dev/null +++ b/histoqc/wsi_handles/utils.py @@ -0,0 +1,38 @@ +from PIL import Image +from PIL.Image import Image as PILImage +from typing import Union, Iterable + + +def hex_to_rgb(hex_color: str): + if hex_color.startswith('#'): + hex_color = hex_color[1:] + + if len(hex_color) != 6: + raise ValueError(f"Invalid hex triplets. Length: {len(hex_color)}") + + rgb_color = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + return rgb_color + + +def _validate_numerics(data: Iterable[float]): + if not isinstance(data, Iterable): + return False + return all([isinstance(x, float) for x in data]) + + +def validate_color(background_color: Union[str, Iterable[float], float]): + # if str -> assume a hex triplet + if isinstance(background_color, str): + return hex_to_rgb(background_color) + # must be numeric, or sequence of numeric + if isinstance(background_color, float): + return background_color + assert _validate_numerics(background_color), (f"background color must be a hex triplet string, a number," + f" or a sequence of numbers") + return tuple(x for x in background_color) + + +def rgba2rgb_pil(img: PILImage, background_color) -> PILImage: + thumb = Image.new("RGB", img.size, validate_color(background_color)) + thumb.paste(img, None, img) + return thumb diff --git a/imported_functions_list.txt b/imported_functions_list.txt new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 7fb7892..e068ec1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,3 +24,7 @@ exclude_lines = [ "if MYPY:", "except ImportError:", ] + +[options.extras_require] +cucim = ["cupy", "cucim"] +dicom = ["wsidicom"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 30754ef..0bc584d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ requests~=2.28.1 Pillow~=9.4.0 setuptools~=65.6.3 shapely~=2.0.1 +dask~=2024.1.1 diff --git a/rmm_log.txt b/rmm_log.txt new file mode 100644 index 0000000..46fb05f --- /dev/null +++ b/rmm_log.txt @@ -0,0 +1,2 @@ +[160761][01:45:16:367101][info ] ----- RMM LOG BEGIN [PTDS DISABLED] ----- +[160761][01:45:16:367132][error ] [A][Stream 0x1][Upstream 10000000000B][FAILURE maximum pool size exceeded] diff --git a/setup.py b/setup.py index ac20fff..49a6691 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,10 @@ "version_scheme": "post-release", }, setup_requires=['setuptools_scm'], + extras_require={ + "dicom": ["wsidicom"], + "cucim": ["cucim", "cupy", "dask-cuda"], + }, package_data={ 'histoqc.config': ['*.ini'], 'histoqc.data': data_files,