From 076b959c2bbba89f457f1468c7d458c4d54a9b7f Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Thu, 25 Apr 2024 09:19:08 -0400
Subject: [PATCH 1/6] Deploy OpenSlide handle abstraction to BaseImage and all
 *Module.py. Make CuImage Handle prototype ready for future development.
 Massive code cleaning

---
 .gitignore                                |   6 +
 histoqc/AnnotationModule.py               |  11 +-
 histoqc/BaseImage.py                      | 259 +++++++---------
 histoqc/BasicModule.py                    |  42 +--
 histoqc/BlurDetectionModule.py            |  25 +-
 histoqc/BrightContrastModule.py           |  27 +-
 histoqc/BubbleRegionByRegion.py           | 109 ++++---
 histoqc/ClassificationModule.py           |  40 +--
 histoqc/DeconvolutionModule.py            |  26 +-
 histoqc/HistogramModule.py                |  22 +-
 histoqc/LightDarkModule.py                |  47 ++-
 histoqc/LocalTextureEstimationModule.py   |  18 +-
 histoqc/MorphologyModule.py               |  23 +-
 histoqc/SaveModule.py                     |  54 ++--
 histoqc/TileExtractionModule.py           |  18 +-
 histoqc/__main__.py                       |   7 +-
 histoqc/_pipeline.py                      |   7 +-
 histoqc/_worker.py                        |  10 +-
 histoqc/annotations/annot_collection.py   |   2 +-
 histoqc/annotations/annotation/base.py    |   3 +-
 histoqc/annotations/annotation/geojson.py |   2 +-
 histoqc/config/__main__.py                |   2 +-
 histoqc/import_wrapper/__init__.py        |   1 +
 histoqc/import_wrapper/helper.py          |  89 +++++-
 histoqc/import_wrapper/openslide.py       |   7 +
 histoqc/import_wrapper/typing.py          |   6 -
 histoqc/tests/test_pipeline_cli.py        |   4 +-
 histoqc/tests/test_ui_cli.py              |   2 -
 histoqc/wsi_handles/base.py               | 340 ++++++++++++++++++++++
 histoqc/wsi_handles/constants.py          |  20 ++
 histoqc/wsi_handles/cuimage_handle.py     | 171 +++++++++++
 histoqc/wsi_handles/openslide_handle.py   | 145 +++++++++
 histoqc/wsi_handles/utils.py              |  38 +++
 pyproject.toml                            |   4 +
 setup.py                                  |   4 +
 35 files changed, 1155 insertions(+), 436 deletions(-)
 delete mode 100644 histoqc/import_wrapper/typing.py
 create mode 100644 histoqc/wsi_handles/base.py
 create mode 100644 histoqc/wsi_handles/constants.py
 create mode 100644 histoqc/wsi_handles/cuimage_handle.py
 create mode 100644 histoqc/wsi_handles/openslide_handle.py
 create mode 100644 histoqc/wsi_handles/utils.py

diff --git a/.gitignore b/.gitignore
index 495397d..e9a2dde 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ wheels/
 *.egg-info/
 *.egg
 *.whl
+*.log
 .coverage*
 MANIFEST
 histoqc/tests/data/*
@@ -24,4 +25,9 @@ htmlcov/
 
 histoqc/_version.py
 environment.yml
+*.svs
+/histoqc/tests/data/
+.histoqc_pkg_data*
+.fuse_hidden*
+.pytest*
 
diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py
index 323fb1f..67414f6 100644
--- a/histoqc/AnnotationModule.py
+++ b/histoqc/AnnotationModule.py
@@ -1,7 +1,8 @@
 import logging
 from typing import List, Tuple
-from histoqc.BaseImage import printMaskHelper
-from skimage import io, img_as_ubyte
+from histoqc.BaseImage import printMaskHelper, BaseImage
+from skimage import io
+from skimage.util import img_as_ubyte
 import os
 from pathlib import PurePosixPath, Path
 from shapely.geometry import Polygon
@@ -47,13 +48,13 @@ def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollectio
     return np.array(mask)
 
 
-def getParams(s, params):
+def getParams(s: BaseImage, params):
     # read params - format: xml, json; file_path; suffix; 
     ann_format = params.get("format", None)
     file_path = params.get("file_path", None)
     suffix = params.get("suffix", "")
 
-    # try use default value if the params are not provided
+    # try using default value if the params are not provided
     if not ann_format:
         # set default format
         ann_format = "xml"
@@ -73,7 +74,7 @@ def getParams(s, params):
     return ann_format, file_path, suffix
 
 
-def saveAnnotationMask(s, params):
+def saveAnnotationMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetAnnotationMask")
 
     (ann_format, file_path, suffix) = getParams(s, params)
diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py
index 947f7c7..91e1a7e 100644
--- a/histoqc/BaseImage.py
+++ b/histoqc/BaseImage.py
@@ -1,23 +1,50 @@
+from __future__ import annotations
 import logging
 import os
 import numpy as np
-import zlib, dill
+import zlib
+import dill
 from distutils.util import strtobool
-from PIL import Image
 import re
-from typing import Union, Tuple
-#os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically
-from histoqc.import_wrapper.openslide import openslide
-
-# there is no branch reset group in re
-# compatible with the previous definition of valid input: leading zero and leading decimals are supported
+from typing import Union, Tuple, cast
+from histoqc.wsi_handles.base import WSIImageHandle
+from histoqc.wsi_handles.constants import KEY_OPENSLIDE
 _REGEX_MAG = r"^(\d?\.?\d*X?)"
 _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE)
 MAG_NA = None
 
+# todo either document or regularize the fields of BaseImage
+# class BaseImageData(TypedDict):
+#     warnings: List[str]
+#     output: List
+#     filename: str
+#     comments: str
+#     outdir: str
+#     dir: str
+#     # width, height
+#     image_base_size: Tuple[int, int]
+#     enable_bounding_box: bool
+#     image_work_size: str | float
+#     mask_statistics: str
+#     base_mag: Optional[float]
+#     img_mask_use: np.ndarray
+#     img_mask_force: List[str]
+#     completed: List[str]
+#     img_bbox: Tuple[int, int, int, int]
+
 
 class BaseImage(dict):
 
+    __image_handle: WSIImageHandle
+
+    @property
+    def image_handle(self) -> WSIImageHandle:
+        return self.__image_handle
+
+    @image_handle.setter
+    def image_handle(self, image_handle: WSIImageHandle):
+        self.__image_handle = image_handle
+
     def __init__(self, fname, fname_outdir, params):
         dict.__init__(self)
 
@@ -33,27 +60,34 @@ def __init__(self, fname, fname_outdir, params):
         self["outdir"] = fname_outdir
         self["dir"] = os.path.dirname(fname)
 
-        self["os_handle"] = openslide.OpenSlide(fname)
-        self["image_base_size"] = self["os_handle"].dimensions
-        self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box","False"))
-        # check if the bbox if doesn't have bbox set enable_bounding_box to False
+        # get handles from config
+        handles = params.get("handles", KEY_OPENSLIDE)
+        # dynamically load wsi image handle
+        self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles)
+
+        self["image_base_size"] = self.image_handle.dimensions
+        self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box", "False"))
+        # check: if it doesn't have bbox set enable_bounding_box to False
         self.setBBox()
         self.addToPrintList("image_bounding_box", self["img_bbox"])
         self["image_work_size"] = params.get("image_work_size", "1.25x")
         self["mask_statistics"] = params.get("mask_statistics", "relative2mask")
-        
+
         self["base_mag"] = getMag(self, params)
 
         if not self["base_mag"]:
-            logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?")
-            return -1
+            logging.error(
+                f"{self['filename']}: Has unknown or uncalculated base magnification,"
+                f" cannot specify magnification scale! Did you try getMag?")
+            return
 
         self.addToPrintList("base_mag", self["base_mag"])
 
         mask_statistics_types = ["relative2mask", "absolute", "relative2image"]
-        if (self["mask_statistics"] not in mask_statistics_types):
+        if self["mask_statistics"] not in mask_statistics_types:
             logging.error(
-                f"mask_statistic type '{self['mask_statistics']}' is not one of the 3 supported options relative2mask, absolute, relative2image!")
+                f"mask_statistic type '{self['mask_statistics']}'"
+                f" is not one of the 3 supported options relative2mask, absolute, relative2image!")
             exit()
 
         self["img_mask_use"] = np.ones(self.getImgThumb(self["image_work_size"]).shape[0:2], dtype=bool)
@@ -63,37 +97,32 @@ def __init__(self, fname, fname_outdir, params):
 
     def __getitem__(self, key):
         value = super(BaseImage, self).__getitem__(key)
-        if hasattr(self,"in_memory_compression") and  self.in_memory_compression and key.startswith("img"):
+        if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"):
             value = dill.loads(zlib.decompress(value))
         return value
 
     def __setitem__(self, key, value):
-        if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"):
+        if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"):
             value = zlib.compress(dill.dumps(value), level=5)
 
-        return super(BaseImage, self).__setitem__(key,value)
+        return super(BaseImage, self).__setitem__(key, value)
 
     # setbounding box start coordinate and size
     def setBBox(self):
         # add self["img_bbox"] = (x, y, width, heigh)
-        osh = self["os_handle"]
+        image_handle = self.image_handle
         # set default bbox
-        (dim_width, dim_height) = osh.dimensions
+        (dim_width, dim_height) = image_handle.dimensions
         self["img_bbox"] = (0, 0, dim_width, dim_height)
         # try to get bbox if bounding_box is ture
-        if self["enable_bounding_box"]:
-            # try get bbox from os handle properties
-            try:
-                x = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA'))
-                y = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA'))
-                width = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA'))
-                height = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA'))
-                self["img_bbox"] = (x, y, width, height)
-            except:
-                # no bbox info in slide set enable_bounding_box as Flase
-                self["enable_bounding_box"] = False
-                logging.warning(f"{self['filename']}: Bounding Box requested but could not read")
-                self["warnings"].append("Bounding Box requested but could not read")
+
+        # Does WSI have bounding box
+        if self["enable_bounding_box"] and image_handle.has_bounding_box:
+            self["img_bbox"] = image_handle.bounding_box
+        elif self["enable_bounding_box"] and not image_handle.has_bounding_box:
+            self["enable_bounding_box"] = False
+            logging.warning(f"{self['filename']}: Bounding Box requested but could not read")
+            self["warnings"].append("Bounding Box requested but could not read")
 
     def addToPrintList(self, name, val):
         self[name] = val
@@ -101,14 +130,14 @@ def addToPrintList(self, name, val):
 
     # find the next higher level by giving a downsample factor 
     # return (level, isFindCloseLevel)
-    def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: 
-        osh = self["os_handle"]
-        relative_down_factors_idx=[np.isclose(i/downsample_factor,1,atol=.01) for i in osh.level_downsamples]
-        level=np.where(relative_down_factors_idx)[0]
+    def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]:
+        osh = self.image_handle
+        relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in osh.level_downsamples]
+        level = np.where(relative_down_factors_idx)[0]
         if level.size:
-            return (level[0], True)
+            return level[0], True
         else:
-            return (osh.get_best_level_for_downsample(downsample_factor), False)
+            return osh.get_best_level_for_downsample(downsample_factor), False
 
     @staticmethod
     def is_valid_size(size: str):
@@ -133,9 +162,9 @@ def getImgThumb(self, size: str):
         # return the img if it exists
         if key in self:
             return self[key]
-        
+
         # get open slide handle
-        osh = self["os_handle"]
+        image_handle = self.image_handle
 
         # get the size of view on current img - the current size of view by using the bounding box.
         # bounding box could be the size of whole img or read the size from the slide mate data.
@@ -157,11 +186,12 @@ def getImgThumb(self, size: str):
             # magnification
             base_mag = self["base_mag"]
             target_sampling_factor = base_mag / target_mag
-            target_dims = tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int))
-            
+            target_dims = cast(Tuple[int, int],
+                               tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int)))
+
             # generate the thumb img
-            self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
-        
+            self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor)
+
         # the size of the img is number 
         elif size.replace(".", "0", 1).isdigit():
             size = float(size)
@@ -169,30 +199,33 @@ def getImgThumb(self, size: str):
             if size < 1:
                 target_downscaling_factor = size
                 target_sampling_factor = 1 / target_downscaling_factor
-                target_dims = tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int))
-                
+                target_dims = cast(Tuple[int, int],
+                                   tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int)))
+
                 # generate the thumb img
-                self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
+                self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor)
 
             # specifies a desired level of open slide
             elif size < 100:
                 target_level = int(size)
-                if target_level >= osh.level_count:
-                    target_level = osh.level_count - 1
-                    msg = f"Desired Image Level {size+1} does not exist! Instead using level {osh.level_count-1}! Downstream output may not be correct"
-                    logging.error(f"{self['filename']}: {msg}" )
+                if target_level >= image_handle.level_count:
+                    target_level = image_handle.level_count - 1
+                    msg = (f"Desired Image Level {size + 1} does not exist!"
+                           f" Instead using level {image_handle.level_count - 1}! Downstream output may not be correct")
+                    logging.error(f"{self['filename']}: {msg}")
                     self["warnings"].append(msg)
-                size = (tuple((np.array(img_base_size)/osh.level_downsamples[target_level]).astype(int))
+                size = (tuple((np.array(img_base_size) / image_handle.level_downsamples[target_level]).astype(int))
                         if self["enable_bounding_box"]
-                        else osh.level_dimensions[target_level])
+                        else image_handle.level_dimensions[target_level])
                 logging.info(
-                    f"{self['filename']} - \t\tloading image from level {target_level} of size {osh.level_dimensions[target_level]}")
-                tile = osh.read_region((bx, by), target_level, size)                
-                self[key] = (np.asarray(rgba2rgb(self, tile))
-                            if np.shape(tile)[-1]==4 
-                            else np.asarray(tile))        
-       
-            # specifies a desired size of thumbnail
+                    f"{self['filename']} - \t\tloading image from level {target_level} of size"
+                    f" {image_handle.level_dimensions[target_level]}")
+                tile = image_handle.read_region((bx, by), target_level, size)
+                self[key] = (np.asarray(self.image_handle.backend_rgba2rgb(tile))
+                             if np.shape(tile)[-1] == 4
+                             else np.asarray(tile))
+
+                # specifies a desired size of thumbnail
             else:
                 # recommend having the dimension is less than 10k     
                 if size > 10000:
@@ -202,89 +235,19 @@ def getImgThumb(self, size: str):
                     self["warnings"].append(msg)
                 target_dims = getDimensionsByOneDim(self, int(size))
                 target_sampling_factor = img_base_size[0] / target_dims[0]
-                self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
+                self[key] = self.image_handle.best_thumb(bx, by, target_dims, target_sampling_factor)
         return self[key]
 
-def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float):
-    osh = s["os_handle"]
-    
-    # get thumb from og
-    if not s["enable_bounding_box"]:
-        max_dim = dims[0] if dims[0] > dims[1] else dims[1]
-        return np.array(osh.get_thumbnail((max_dim, max_dim)))
-    
-    (level, isExactLevel) = s.getBestLevelForDownsample(target_sampling_factor)
-    
-    # check if get the existing level
-    if isExactLevel:
-        tile = osh.read_region((x, y), level, dims)
-        return np.asarray(rgba2rgb(s, tile)) if np.shape(tile)[-1]==4 else np.asarray(tile)
-    # scale down the thumb img from the next high level
-    else:
-        return resizeTileDownward(s, target_sampling_factor, level)
-        
-'''
-the followings are helper functions 
-'''
-def resizeTileDownward(self, target_downsampling_factor, level):
-    osh = self["os_handle"]
-    (bx, by, bwidth, bheight) = self["img_bbox"]
-    end_x = bx + bwidth
-    end_y = by + bheight
-    
-    cloest_downsampling_factor = osh.level_downsamples[level]
-    win_size = 2048
-    
-    # create a new img
-    output = []
-    for x in range(bx, end_x, win_size):
-        row_piece = []
-        for y in range(by, end_y, win_size):
-            win_width, win_height = [win_size] * 2
-            # Adjust extraction size for endcut
-            if end_x < x + win_width:
-                win_width = end_x - x
-            if end_y < y +  win_height:
-                win_height = end_y - y
-
-            
-            win_down_width = int(round(win_width / target_downsampling_factor))
-            win_down_height = int(round(win_height / target_downsampling_factor))
-            
-            win_width = int(round(win_width / cloest_downsampling_factor))
-            win_height = int(round(win_height / cloest_downsampling_factor))
-            
-            # TODO Note: this isn't very efficient, and if more efficiency isneeded 
-            # We should likely refactor using "paste" from Image.
-            # Or even just set the pixels directly with indexing.
-            cloest_region = osh.read_region((x, y), level, (win_width, win_height))
-            if np.shape(cloest_region)[-1]==4:
-                cloest_region = rgba2rgb(self, cloest_region)
-            target_region = cloest_region.resize((win_down_width, win_down_height))
-            row_piece.append(target_region)
-        row_piece = np.concatenate(row_piece, axis=0)
-        
-        output.append(row_piece)
-    output = np.concatenate(output, axis=1)
-    return output
-
-
-def rgba2rgb(s: BaseImage, img):
-    bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff")
-    thumb = Image.new("RGB", img.size, bg_color)
-    thumb.paste(img, None, img)
-    return thumb
-
-
-def printMaskHelper(type: str, prev_mask, curr_mask):
-    if type == "relative2mask":
+
+def printMaskHelper(statistic_type: str, prev_mask, curr_mask):
+    if statistic_type == "relative2mask":
         if len(prev_mask.nonzero()[0]) == 0:
             return str(-100)
         else:
             return str(1 - len(curr_mask.nonzero()[0]) / len(prev_mask.nonzero()[0]))
-    elif type == "relative2image":
+    elif statistic_type == "relative2image":
         return str(len(curr_mask.nonzero()[0]) / np.prod(curr_mask.shape))
-    elif type == "absolute":
+    elif statistic_type == "absolute":
         return str(len(curr_mask.nonzero()[0]))
     else:
         return str(-1)
@@ -306,29 +269,23 @@ def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]:
         return MAG_NA
     # regex determines X must either be abscent or at the end of the string
     if "X" in mag.upper():
-        mag = mag[0:-1]
+        mag = mag[0: -1]
     return float(mag)
 
 
 # this function is seperated out because in the future we hope to have automatic detection of
 # magnification if not present in open slide, and/or to confirm openslide base magnification
 def getMag(s: BaseImage, params) -> Union[float, None]:
-    logging.info(f"{s['filename']} - \tgetMag")
-    osh = s["os_handle"]
-    mag = osh.properties.get("openslide.objective-power") or \
-            osh.properties.get("aperio.AppMag") or MAG_NA
-    # if mag or strtobool(params.get("confirm_base_mag", "False")):
-    #     # do analysis work here
-    #     logging.warning(f"{s['filename']} - Unknown base magnification for file")
-    #     s["warnings"].append(f"{s['filename']} - Unknown base magnification for file")
-    #     return None
-    # else:
+    osh = s.image_handle
+    mag = osh.magnification or MAG_NA
     # workaround for unspecified mag -- with or without automatic detection it might be preferred to have
     # mag predefined
     mag = mag or parsed_mag(params.get("base_mag"))
     # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise
     # the exception here.
-    return float(mag) if mag is not MAG_NA else MAG_NA
+    mag = float(mag) if mag is not MAG_NA else MAG_NA
+    logging.info(f"{s['filename']} - \tgetMag = {mag}")
+    return mag
 
 
 def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]:
diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py
index 418c715..4d4dbcd 100644
--- a/histoqc/BasicModule.py
+++ b/histoqc/BasicModule.py
@@ -2,30 +2,30 @@
 import os
 from histoqc.BaseImage import printMaskHelper
 from skimage.morphology import remove_small_objects, binary_opening, disk
-from skimage import io, color, img_as_ubyte
+from skimage import io
+from skimage.util import img_as_ubyte
+from histoqc.BaseImage import BaseImage
 
-import matplotlib.pyplot as plt
 
-
-def getBasicStats(s, params):
+def getBasicStats(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetBasicStats")
-    osh = s["os_handle"]
-    s.addToPrintList("type", osh.properties.get("openslide.vendor", "NA"))
-    s.addToPrintList("levels", osh.properties.get("openslide.level-count", "NA"))
-    s.addToPrintList("height", osh.properties.get("openslide.level[0].height", "NA"))
-    s.addToPrintList("width", osh.properties.get("openslide.level[0].width", "NA"))
-    s.addToPrintList("mpp_x", osh.properties.get("openslide.mpp-x", "NA"))
-    s.addToPrintList("mpp_y", osh.properties.get("openslide.mpp-y", "NA"))
-    s.addToPrintList("comment", osh.properties.get("openslide.comment", "NA").replace("\n", " ").replace("\r", " "))
+    osh = s.image_handle
+    s.addToPrintList("type", osh.vendor)
+    s.addToPrintList("levels", osh.level_count)
+    s.addToPrintList("height", osh.dimensions[1] if len(osh.dimensions) >= 2 else "NA")
+    s.addToPrintList("width", osh.dimensions[0] if len(osh.dimensions) >= 2 else "NA")
+    s.addToPrintList("mpp_x", osh.mpp_x)
+    s.addToPrintList("mpp_y", osh.mpp_y)
+    s.addToPrintList("comment", osh.comment.replace("\n", " ").replace("\r", " "))
     return
 
 
-def finalComputations(s, params):
+def finalComputations(s: BaseImage, params):
     mask = s["img_mask_use"]
     s.addToPrintList("pixels_to_use", str(len(mask.nonzero()[0])))
 
 
-def finalProcessingSpur(s, params):
+def finalProcessingSpur(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tfinalProcessingSpur")
     disk_radius = int(params.get("disk_radius", "25"))
     selem = disk(disk_radius)
@@ -43,12 +43,14 @@ def finalProcessingSpur(s, params):
 
     if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
         logging.warning(
-            f"{s['filename']} - After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+            f"{s['filename']} - After BasicModule.finalProcessingSpur"
+            f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(
-            f"After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+            f"After BasicModule.finalProcessingSpur"
+            f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
 
 
-def finalProcessingArea(s, params):
+def finalProcessingArea(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tfinalProcessingArea")
     area_thresh = int(params.get("area_threshold", "1000"))
     mask = s["img_mask_use"]
@@ -66,6 +68,8 @@ def finalProcessingArea(s, params):
 
     if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
         logging.warning(
-            f"{s['filename']} - After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+            f"{s['filename']} - After BasicModule.finalProcessingArea"
+            f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(
-            f"After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+            f"After BasicModule.finalProcessingArea"
+            f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py
index 8c55890..9900780 100644
--- a/histoqc/BlurDetectionModule.py
+++ b/histoqc/BlurDetectionModule.py
@@ -2,21 +2,18 @@
 import os
 
 import skimage
-from histoqc.BaseImage import printMaskHelper
-from skimage import io, img_as_ubyte, morphology, measure
+from histoqc.BaseImage import printMaskHelper, BaseImage
+from skimage import io, morphology, measure
+from skimage.util import img_as_ubyte
 from skimage.color import rgb2gray
-from skimage.filters import rank
 import numpy as np
 
-import matplotlib.pyplot as plt
-
-
 # Analysis of focus measure operators for shape-from-focus
 # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012
 # https://pdfs.semanticscholar.org/8c67/5bf5b542b98bf81dcf70bd869ab52ab8aae9.pdf
 
 
-def identifyBlurryRegions(s, params):
+def identifyBlurryRegions(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tidentifyBlurryRegions")
 
     blur_radius = int(params.get("blur_radius", 7))
@@ -27,8 +24,9 @@ def identifyBlurryRegions(s, params):
     img_laplace = np.abs(skimage.filters.laplace(img))
     mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold
 
-    mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :,
-           1]  # for some reason resize takes a grayscale and produces a 3chan
+    # for some reason resize takes a grayscale and produces a 3chan
+    mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, 1]
+
     mask = s["img_mask_use"] & (mask > 0)
 
     io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask))
@@ -46,20 +44,19 @@ def identifyBlurryRegions(s, params):
     else:
         nobj = area_max = area_mean = 0
 
-
     s.addToPrintList("blurry_removed_num_regions", str(nobj))
     s.addToPrintList("blurry_removed_mean_area", str(area_mean))
     s.addToPrintList("blurry_removed_max_area", str(area_max))
 
-
     s.addToPrintList("blurry_removed_percent",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
 
     if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
         logging.warning(
-            f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+            f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions "
+            f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(
-            f"After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
-
+            f"After BlurDetectionModule.identifyBlurryRegions"
+            f" NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
 
     return
diff --git a/histoqc/BrightContrastModule.py b/histoqc/BrightContrastModule.py
index 89dbfbd..df98aec 100644
--- a/histoqc/BrightContrastModule.py
+++ b/histoqc/BrightContrastModule.py
@@ -3,9 +3,10 @@
 from skimage.filters import sobel
 from skimage.color import convert_colorspace, rgb2gray
 from distutils.util import strtobool
+from histoqc.BaseImage import BaseImage
 
 
-def getBrightnessGray(s, params):
+def getBrightnessGray(s: BaseImage, params):
     prefix = params.get("prefix", None)
     prefix = prefix+"_" if prefix else ""
     logging.info(f"{s['filename']} - \tgetContrast:{prefix}")
@@ -31,7 +32,7 @@ def getBrightnessGray(s, params):
     return
 
 
-def getBrightnessByChannelinColorSpace(s, params):
+def getBrightnessByChannelinColorSpace(s: BaseImage, params):
     prefix = params.get("prefix", None)
     prefix = prefix + "_" if prefix else ""
 
@@ -43,17 +44,16 @@ def getBrightnessByChannelinColorSpace(s, params):
 
     invert = strtobool(params.get("invert", "False"))
 
-
     img = s.getImgThumb(s["image_work_size"])
 
     suffix = ""
-    if (to_color_space != "RGB"):
+    if to_color_space != "RGB":
         img = convert_colorspace(img, "RGB", to_color_space)
         suffix = "_" + to_color_space
 
     for chan in range(0, 3):
         vals = img[:, :, chan]
-        if (limit_to_mask):
+        if limit_to_mask:
 
             mask = s[mask_name] if not invert else ~s[mask_name]
             vals = vals[mask]
@@ -67,7 +67,7 @@ def getBrightnessByChannelinColorSpace(s, params):
     return
 
 
-def getContrast(s, params):
+def getContrast(s: BaseImage, params):
     prefix = params.get("prefix", None)
     prefix = prefix + "_" if prefix else ""
 
@@ -77,10 +77,9 @@ def getContrast(s, params):
 
     invert = strtobool(params.get("invert", "False"))
 
-
     img = s.getImgThumb(s["image_work_size"])
     img = rgb2gray(img)
-
+    # noinspection PyTypeChecker
     sobel_img = sobel(img) ** 2
 
     if limit_to_mask:
@@ -90,14 +89,13 @@ def getContrast(s, params):
         sobel_img = sobel_img[mask]
         img = img[s["img_mask_use"]]
 
-    if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to
-                      # produce metrics for completeness with warning
+    if img.size == 0:  # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to
+        # produce metrics for completeness with warning
 
-        s.addToPrintList(f"{prefix}tenenGrad_contrast", str(-100))
+        s.addToPrintList(f"{prefix}tenen_grad_contrast", str(-100))
         s.addToPrintList(f"{prefix}michelson_contrast", str(-100))
         s.addToPrintList(f"{prefix}rms_contrast", str(-100))
 
-
         logging.warning(f"{s['filename']} - After BrightContrastModule.getContrast: NO tissue "
                         f"detected, statistics are impossible to compute, defaulting to -100 !")
         s["warnings"].append(f"After BrightContrastModule.getContrast: NO tissue remains "
@@ -105,10 +103,9 @@ def getContrast(s, params):
 
         return
 
-
     # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask
-    tenenGrad_contrast = np.sqrt(np.sum(sobel_img)) / img.size
-    s.addToPrintList(f"{prefix}tenenGrad_contrast", str(tenenGrad_contrast))
+    tenen_grad_contrast = np.sqrt(np.sum(sobel_img)) / img.size
+    s.addToPrintList(f"{prefix}tenen_grad_contrast", str(tenen_grad_contrast))
 
     # Michelson contrast
     max_img = img.max()
diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py
index afd62f5..e5d2249 100644
--- a/histoqc/BubbleRegionByRegion.py
+++ b/histoqc/BubbleRegionByRegion.py
@@ -1,50 +1,35 @@
 import logging
 import os
-import sys
 
-from ast import literal_eval as make_tuple
-
-from distutils.util import strtobool
 from histoqc.BaseImage import printMaskHelper
+from scipy.signal import convolve2d
 
-import scipy.signal
-
-from skimage import io, img_as_ubyte
-from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace
+from skimage.util import img_as_ubyte
+from skimage.filters import frangi
 from skimage.color import rgb2gray
-from skimage.morphology import remove_small_objects, disk, binary_opening
-from skimage.feature import local_binary_pattern
-
-from skimage.transform import rescale, resize, downscale_local_mean
-
-from math import ceil
-
-from sklearn.naive_bayes import GaussianNB
-from sklearn.ensemble import RandomForestClassifier
-
+from skimage.morphology import remove_small_objects
+from histoqc.BaseImage import BaseImage
 from skimage import io, color
-
-
 import numpy as np
 
-import matplotlib.pyplot as plt
 
 global_holder = {}
 
-#WARNING: Not as robust as other modules
-def roiWise(s, params):
+
+# WARNING: Not as robust as other modules
+def roiWise(s: BaseImage, params):
     name = params.get("name", "classTask")
     print("\tpixelWise:\t", name, end="")
 
     level = int(params.get("level", 1))
-    win_size = int(params.get("win_size", 2048)) #the size of the ROI which will be iteratively considered
+    win_size = int(params.get("win_size", 2048))  # the size of the ROI which will be iteratively considered
 
-    osh = s["os_handle"]
+    osh = s.image_handle
 
     dim_base = osh.level_dimensions[0]
     dims = osh.level_dimensions[level]
 
-    ratio_x = dim_base[0] / dims[0] #figure out the difference between desi
+    ratio_x = dim_base[0] / dims[0]  # figure out the difference between desi
     ratio_y = dim_base[1] / dims[1]
 
     frangi_scale_range = (1, 6)
@@ -58,13 +43,15 @@ def roiWise(s, params):
         row_piece = []
         print('.', end='', flush=True)
         for y in range(0, dim_base[1], round(win_size * ratio_y)):
-            region = np.asarray(osh.read_region((x, y), 1, (win_size, win_size)))
+            region = osh.read_region((x, y), 1, (win_size, win_size))
+            region = np.array(region)
             region = region[:, :, 0:3]  # remove alpha channel
             g = rgb2gray(region)
+            # todo -- forward compatibility. Later version of frangi alters the signatures
             feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges)
             feat = feat / 8.875854409275627e-08
             region_mask = np.bitwise_and(g < .3, feat > 5)
-            region_mask = remove_small_objects(region_mask, min_size=100, in_place=True)
+            region_mask = remove_small_objects(region_mask, min_size=100)
             # region_std = region.std(axis=2)
             # region_gray = rgb2gray(region)
             # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255)
@@ -77,52 +64,54 @@ def roiWise(s, params):
     mask = np.concatenate(mask, axis=1)
 
     if params.get("area_threshold", "") != "":
-        mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), in_place=True)
+        # forward compatibility
+        # inplace=True is equivalent to out=mask. Therefore, it is removed in future version
+        mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), out=mask)
 
     s.addToPrintList(name, str(mask.mean()))
 
-    #TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask
-    #s.addToPrintList(name,
-    #                 printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+    # TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask
+    # s.addToPrintList(name,
+    #                  printMaskHelper(params.get("mask_statistics", s["mask_statistics"]),
+    #                                  prev_mask, s["img_mask_use"]))
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) #.astype(np.uint8) * 255)
+    # .astype(np.uint8) * 255)
+    io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask))
 
     return
 
 
-def detectSmoothness(s, params):
-        logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness")
-        thresh = float(params.get("threshold", ".01" ))
-        kernel_size = int(params.get("kernel_size", "10"))
-        min_object_size = int(params.get("min_object_size", "100"))
+def detectSmoothness(s: BaseImage, params):
+    logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness")
+    thresh = float(params.get("threshold", ".01"))
+    kernel_size = int(params.get("kernel_size", "10"))
+    min_object_size = int(params.get("min_object_size", "100"))
+    img = s.getImgThumb(s["image_work_size"])
+    img = color.rgb2gray(img)
+    avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2)
 
-        img = s.getImgThumb(s["image_work_size"])
-        img = color.rgb2gray(img)
-        avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2)
+    imf = convolve2d(img, avg, mode="same")
+    mask_flat = abs(imf - img) < thresh
 
-        imf = scipy.signal.convolve2d(img, avg, mode="same")
-        mask_flat = abs(imf - img) < thresh
+    mask_flat = remove_small_objects(mask_flat, min_size=min_object_size)
+    mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size)
 
-        mask_flat = remove_small_objects(mask_flat, min_size=min_object_size)
-        mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size)
+    prev_mask = s["img_mask_use"]
+    s["img_mask_flat"] = mask_flat
 
-        prev_mask = s["img_mask_use"]
-        s["img_mask_flat"] = mask_flat
+    io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask))
 
-        io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask))
+    s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"]
 
-        s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"]
+    s.addToPrintList("flat_areas",
+                     printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask,
+                                     s["img_mask_use"]))
 
+    if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
+        logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue "
+                        f"remains detectable! Downstream modules likely to be incorrect/fail")
+        s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains "
+                             f"detectable! Downstream modules likely to be incorrect/fail")
 
-        s.addToPrintList("flat_areas",
-                         printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask,
-                                         s["img_mask_use"]))
-
-        if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
-            logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue "
-                            f"remains detectable! Downstream modules likely to be incorrect/fail")
-            s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains "
-                                 f"detectable! Downstream modules likely to be incorrect/fail")
-
-        return
+    return
 
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index 9567a49..fdbe039 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -7,8 +7,9 @@
 
 from distutils.util import strtobool
 
-from histoqc.BaseImage import printMaskHelper
-from skimage import io, img_as_ubyte, img_as_bool
+from histoqc.BaseImage import printMaskHelper, BaseImage
+from skimage import io
+from skimage.util import img_as_ubyte, img_as_bool
 from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace
 from skimage.color import rgb2gray
 from skimage.morphology import remove_small_objects, disk, dilation
@@ -21,10 +22,8 @@
 
 import numpy as np
 
-import matplotlib.pyplot as plt
 
-
-def pixelWise(s, params):
+def pixelWise(s: BaseImage, params):
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tpixelWise:\t", name)
 
@@ -34,7 +33,7 @@ def pixelWise(s, params):
     if fname == "":
         logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name)
         sys.exit(1)
-        return
+
     model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1)
 
     img = s.getImgThumb(s["image_work_size"])
@@ -90,15 +89,19 @@ def compute_gaussian(img, params):
     gaussian_sigma = int(params.get("gaussian_sigma", 1))
     gaussian_multichan = strtobool(params.get("gaussian_multichan", False))
 
-    if (gaussian_multichan):
-        return gaussian(img, sigma=gaussian_sigma, multichannel=gaussian_multichan)
+    # todo: forward compatibility
+    # todo: after 0.19 default multichannel behavior is fixed and explicitly setting channel_axis is preferred.
+    # todo: multichannel is also deprecated in later versions
+    if gaussian_multichan:
+        return gaussian(img, sigma=gaussian_sigma, channel_axis=-1)
     else:
         return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None]
 
 
 def compute_median(img, params):
     median_disk_size = int(params.get("median_disk_size", 3))
-    return median(rgb2gray(img), selem=disk(median_disk_size))[:, :, None]
+    # starting from 0.19, selem is deprecated and footprint is preferred.
+    return median(rgb2gray(img), footprint=disk(median_disk_size))[:, :, None]
 
 
 def compute_gabor(img, params):
@@ -132,7 +135,8 @@ def compute_frangi(img, params):
     frangi_beta1 = float(params.get("frangi_beta1", .5))
     frangi_beta2 = float(params.get("frangi_beta2", 15))
     frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True"))
-    feat = frangi(rgb2gray(img), scale_range = frangi_scale_range, scale_step =frangi_scale_step, beta =frangi_beta1, gamma=frangi_beta2, black_ridges  =frangi_black_ridges)
+    sigmas = frangi_scale_range + (frangi_scale_step,)
+    feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, black_ridges=frangi_black_ridges)
     return feat[:, :, None]  # add singleton dimension
 
 
@@ -147,7 +151,7 @@ def compute_features(img, params):
     return np.concatenate(feats, axis=2)
 
 
-def byExampleWithFeatures(s, params):
+def byExampleWithFeatures(s: BaseImage, params):
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}")
 
@@ -158,12 +162,10 @@ def byExampleWithFeatures(s, params):
     if examples == "":
         logging.error(f"{s['filename']} - No examples provided in ClassificationModule.byExample for {name} !!")
         sys.exit(1)
-        return
 
     if params.get("features", "") == "":
         logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!")
         sys.exit(1)
-        return
 
     with params["lock"]:  # this lock is shared across all threads such that only one thread needs to train the model
         # then it is shared with all other modules
@@ -192,13 +194,13 @@ def byExampleWithFeatures(s, params):
                 
                 mask = mask.reshape(-1, 1)
 
-                if nsamples_per_example != -1: #sub sambling required
-                    nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]*nsamples_per_example)
+                if nsamples_per_example != -1:  # sub sampling required
+                    nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]
+                                                                                       * nsamples_per_example)
                     idxkeep = np.random.choice(mask.shape[0], size=int(nitems))
                     eximg = eximg[idxkeep, :]
                     mask = mask[idxkeep]
 
-
                 model_vals.append(eximg)
                 model_labels = np.vstack((model_labels, mask))
 
@@ -216,14 +218,14 @@ def byExampleWithFeatures(s, params):
     cal = cal.reshape(img.shape[0], img.shape[1], 2)
 
     mask = cal[:, :, 1] > thresh
-
     area_thresh = int(params.get("area_threshold", "5"))
     if area_thresh > 0:
-        mask = remove_small_objects(mask, min_size=area_thresh, in_place=True)
+        # inplace=True is redundant and deprecated.
+        mask = remove_small_objects(mask, min_size=area_thresh, out=mask)
 
     dilate_kernel_size = int(params.get("dilate_kernel_size", "0"))
     if dilate_kernel_size > 0:
-        mask = dilation(mask, selem=np.ones((dilate_kernel_size, dilate_kernel_size)))
+        mask = dilation(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size)))
 
     mask = s["img_mask_use"] & (mask > 0)
 
diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py
index dbb41f1..bff440b 100644
--- a/histoqc/DeconvolutionModule.py
+++ b/histoqc/DeconvolutionModule.py
@@ -2,18 +2,15 @@
 import os
 import sys
 import numpy as np
-from skimage import io, color, img_as_ubyte
-from skimage.exposure import rescale_intensity
+from skimage import io
+from skimage.util import img_as_ubyte
+from histoqc.BaseImage import BaseImage
 from skimage.color import separate_stains
-from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb
-from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \
-    hpx_from_rgb  # need to load all of these in case the user selects them
 from distutils.util import strtobool
+from histoqc.import_wrapper import dynamic_import
 
-import matplotlib.pyplot as plt
 
-
-def separateStains(s, params):
+def separateStains(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tseparateStains")
     stain = params.get("stain", "")
     use_mask = strtobool(params.get("use_mask", "True"))
@@ -21,25 +18,23 @@ def separateStains(s, params):
     if stain == "":
         logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains")
         sys.exit(1)
-        return
-
-    stain_matrix = getattr(sys.modules[__name__], stain, None)
 
-    if stain_matrix is None:
+    try:
+        stain_matrix = dynamic_import("skimage.color", stain, return_first=True)
+    except ImportError:
         logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains")
         sys.exit(1)
-        return
 
     mask = s["img_mask_use"]
 
-    if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early
+    if use_mask and len(mask.nonzero()[0]) == 0:  # -- lets just error check at the top if mask is empty and abort early
         for c in range(3):
             s.addToPrintList(f"deconv_c{c}_std", str(-100))
             s.addToPrintList(f"deconv_c{c}_mean", str(-100))
             io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape)))
 
         logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue "
-                             f"remains detectable! Saving Black images")
+                        f"remains detectable! Saving Black images")
         s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue "
                              f"remains detectable! Saving Black images")
 
@@ -54,7 +49,6 @@ def separateStains(s, params):
         clip_max_val = np.quantile(dc.flatten(), .99)
         dc = np.clip(dc, a_min=0, a_max=clip_max_val)
 
-
         if use_mask:
             dc_sub = dc[mask]
             dc_min = dc_sub.min()
diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py
index e58707c..a57122f 100644
--- a/histoqc/HistogramModule.py
+++ b/histoqc/HistogramModule.py
@@ -4,11 +4,12 @@
 from skimage import io
 import matplotlib.pyplot as plt
 from distutils.util import strtobool
+from histoqc.BaseImage import BaseImage
+# this holds a local copy of the histograms of the template images so that they need only be computed once
+global_holder = {}
 
-global_holder = {} #this holds a local copy of the histograms of the template images so that they need only be computed once
 
-
-def getHistogram(s, params):
+def getHistogram(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetHistogram")
     limit_to_mask = strtobool(params.get("limit_to_mask", True))
     bins = int(params.get("bins", 20))
@@ -35,19 +36,20 @@ def computeHistogram(img, bins, mask=-1):
     result = np.zeros(shape=(bins, 3))
     for chan in range(0, 3):
         vals = img[:, :, chan].flatten()
-        if (isinstance(mask, np.ndarray)):
+        if isinstance(mask, np.ndarray):
             vals = vals[mask.flatten()]
 
-        result[:, chan] = np.histogram(vals, bins=bins, density=True, range=[0, 255])[0]
+        result[:, chan] = np.histogram(vals, bins=bins, density=True, range=(0, 255))[0]
 
     return result
 
 
-def compareToTemplates(s, params):
+def compareToTemplates(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tcompareToTemplates")
     bins = int(params.get("bins", 20))
     limit_to_mask = strtobool(params.get("limit_to_mask", True))
-    if (not global_holder.get("templates", False)): #if the histograms haven't already been computed, compute and store them now
+    # if the histograms haven't already been computed, compute and store them now
+    if not global_holder.get("templates", False):
         templates = {}
         for template in params["templates"].splitlines():
             templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins)
@@ -56,16 +58,16 @@ def compareToTemplates(s, params):
 
     img = s.getImgThumb(s["image_work_size"])
 
-    if (limit_to_mask):
+    if limit_to_mask:
         mask = s["img_mask_use"]
         if len(mask.nonzero()[0]) == 0:
 
             logging.warning(f"{s['filename']} - HistogramModule.compareToTemplates NO tissue "
                             f"remains detectable in mask!")
             s["warnings"].append(f"HistogramModule.compareToTemplates NO tissue "
-                                        f"remains detectable in mask!")
+                                 f"remains detectable in mask!")
 
-            imghst = np.zeros((bins,3))
+            imghst = np.zeros((bins, 3))
 
         else:
             imghst = computeHistogram(img, bins, mask)
diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py
index 1f8d306..cf3290a 100644
--- a/histoqc/LightDarkModule.py
+++ b/histoqc/LightDarkModule.py
@@ -1,23 +1,21 @@
 import logging
 import os
 import numpy as np
-from histoqc.BaseImage import printMaskHelper
-from skimage import io, color, img_as_ubyte
+from histoqc.BaseImage import printMaskHelper, BaseImage
+from skimage import io, color
+from skimage.util import img_as_ubyte
 from distutils.util import strtobool
 from skimage.filters import threshold_otsu, rank
 from skimage.morphology import disk
 from sklearn.cluster import KMeans
 from skimage import exposure
 
-import matplotlib.pyplot as plt
 
-
-
-def getIntensityThresholdOtsu(s, params):
+def getIntensityThresholdOtsu(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu")
     name = params.get("name", "classTask")    
     local = strtobool(params.get("local", "False"))
-    radius = float(params.get("radius", 15))
+    radius = int(params.get("radius", 15))
     selem = disk(radius)
 
     img = s.getImgThumb(s["image_work_size"])
@@ -28,9 +26,9 @@ def getIntensityThresholdOtsu(s, params):
     else:
         thresh = threshold_otsu(img)
 
-    map = img < thresh
+    region_below_thresh = img < thresh
 
-    s["img_mask_" + name] = map > 0
+    s["img_mask_" + name] = region_below_thresh > 0
     if strtobool(params.get("invert", "False")):
         s["img_mask_" + name] = ~s["img_mask_" + name]
 
@@ -51,10 +49,9 @@ def getIntensityThresholdOtsu(s, params):
     return
 
 
-def getIntensityThresholdPercent(s, params):
+def getIntensityThresholdPercent(s: BaseImage, params):
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}")
-
     lower_thresh = float(params.get("lower_threshold", "-inf"))
     upper_thresh = float(params.get("upper_threshold", "inf"))
 
@@ -70,13 +67,11 @@ def getIntensityThresholdPercent(s, params):
     map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std)
 
     img = color.rgb2gray(img)
-    map = np.bitwise_and(img > lower_thresh, img < upper_thresh)
-
-    map = np.bitwise_and(map, map_std)
-
-    s["img_mask_" + name] = map > 0
+    region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh)
 
+    region_between_interval = np.bitwise_and(region_between_interval, map_std)
 
+    s["img_mask_" + name] = region_between_interval > 0
 
     if strtobool(params.get("invert", "False")):
         s["img_mask_" + name] = ~s["img_mask_" + name]
@@ -84,7 +79,8 @@ def getIntensityThresholdPercent(s, params):
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name]
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(prev_mask & ~s["img_mask_" + name]))
+    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png",
+              img_as_ubyte(prev_mask & ~s["img_mask_" + name]))
 
     s.addToPrintList(name,
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -98,11 +94,7 @@ def getIntensityThresholdPercent(s, params):
     return
 
 
-
-
-
-
-def removeBrightestPixels(s, params):
+def removeBrightestPixels(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels")
 
     # lower_thresh = float(params.get("lower_threshold", -float("inf")))
@@ -115,13 +107,13 @@ def removeBrightestPixels(s, params):
     img = color.rgb2gray(img)
 
     kmeans = KMeans(n_clusters=3,  n_init=1).fit(img.reshape([-1, 1]))
+    # noinspection PyUnresolvedReferences
     brightest_cluster = np.argmax(kmeans.cluster_centers_)
+    # noinspection PyUnresolvedReferences
     darkest_point_in_brightest_cluster = (img.reshape([-1, 1])[kmeans.labels_ == brightest_cluster]).min()
 
     s["img_mask_bright"] = img > darkest_point_in_brightest_cluster
 
-
-
     if strtobool(params.get("invert", "False")):
         s["img_mask_bright"] = ~s["img_mask_bright"]
 
@@ -142,8 +134,7 @@ def removeBrightestPixels(s, params):
     return
 
 
-
-def minimumPixelIntensityNeighborhoodFiltering(s,params):
+def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering")
     disk_size = int(params.get("disk_size", 10000))
     threshold = int(params.get("upper_threshold", 200))
@@ -156,7 +147,6 @@ def minimumPixelIntensityNeighborhoodFiltering(s,params):
     imgfilt = rank.minimum(img, selem)
     s["img_mask_bright"] = imgfilt > threshold
 
-
     if strtobool(params.get("invert", "True")):
         s["img_mask_bright"] = ~s["img_mask_bright"]
 
@@ -176,7 +166,8 @@ def minimumPixelIntensityNeighborhoodFiltering(s,params):
 
     return
 
-def saveEqualisedImage(s,params):
+
+def saveEqualisedImage(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage")
 
     img = s.getImgThumb(s["image_work_size"])
diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py
index 6d3733f..610141c 100644
--- a/histoqc/LocalTextureEstimationModule.py
+++ b/histoqc/LocalTextureEstimationModule.py
@@ -1,10 +1,8 @@
 import logging
 import numpy as np
-from skimage import  color
+from skimage import color
 from distutils.util import strtobool
 from skimage.feature import graycomatrix, graycoprops
-import matplotlib.pyplot as plt
-
 
 
 def estimateGreyComatrixFeatures(s, params):
@@ -15,17 +13,17 @@ def estimateGreyComatrixFeatures(s, params):
     patch_size = int(params.get("patch_size", 32))
     npatches = int(params.get("npatches", 100))
     nlevels = int(params.get("nlevels", 8))
-    feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':')
+    feats = params.get("feats", "contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':')
     invert = strtobool(params.get("invert", "False"))
-    mask_name = params.get("mask_name","img_mask_use")
-
+    mask_name = params.get("mask_name", "img_mask_use")
 
     img = s.getImgThumb(s["image_work_size"])
     img = color.rgb2gray(img)
 
     mask = s[mask_name] if not invert else ~s[mask_name]
     if len(mask.nonzero()[0]) == 0:  # add warning in case the no tissus detected in mask
-        msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask"
+        msg = (f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}"
+               f" Can not estimate the empty mask since NO tissue remains detectable in mask")
         logging.warning(f"{s['filename']} - {msg}")
         s["warnings"].append(msg)
         return
@@ -36,10 +34,10 @@ def estimateGreyComatrixFeatures(s, params):
 
     results = []
 
-    for id in idx:
-        r, c = maskidx[id, :]
+    for index in idx:
+        r, c = maskidx[index, :]
         patch = img[r:r + patch_size, c:c + patch_size]
-        glcm = graycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5],
+        glcm = graycomatrix(np.digitize(patch, np.linspace(0, 1, num=nlevels), right=True), distances=[5],
                             angles=[0], levels=nlevels, symmetric=True, normed=True)
 
         results.append([graycoprops(glcm, prop=feat) for feat in feats])
diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py
index 68e8437..d660cfe 100644
--- a/histoqc/MorphologyModule.py
+++ b/histoqc/MorphologyModule.py
@@ -2,12 +2,10 @@
 import os
 import numpy as np
 from histoqc.BaseImage import printMaskHelper
-from skimage import io, morphology, img_as_ubyte, measure
-
+from skimage import io, morphology, measure
+from skimage.util import img_as_ubyte
 from scipy import ndimage as ndi
-
-import matplotlib.pyplot as plt  # these 2 are used for debugging
-from histoqc.SaveModule import blend2Images #for easier debugging
+from typing import cast
 
 
 def removeSmallObjects(s, params):
@@ -22,7 +20,6 @@ def removeSmallObjects(s, params):
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = img_reduced
 
-
     rps = measure.regionprops(morphology.label(img_small))
     if rps:
         areas = np.asarray([rp.area for rp in rps])
@@ -36,14 +33,9 @@ def removeSmallObjects(s, params):
     s.addToPrintList("small_tissue_removed_mean_area", str(area_mean))
     s.addToPrintList("small_tissue_removed_max_area", str(area_max))
 
-
-
-
-
     s.addToPrintList("small_tissue_removed_percent",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
 
-
     if len(s["img_mask_use"].nonzero()[0]) == 0:  # add warning in case the final tissue is empty
         logging.warning(f"{s['filename']} - After MorphologyModule.removeSmallObjects: NO tissue "
                         f"remains detectable! Downstream modules likely to be incorrect/fail")
@@ -58,8 +50,8 @@ def remove_large_objects(img, max_size):
     selem = ndi.generate_binary_structure(img.ndim, 1)
     ccs = np.zeros_like(img, dtype=np.int32)
     ndi.label(img, selem, output=ccs)
-    component_sizes = np.bincount(ccs.ravel())
-    too_big = component_sizes > max_size
+    component_sizes: np.ndarray = np.bincount(ccs.ravel())
+    too_big: np.ndarray = cast(np.ndarray, component_sizes > max_size)
     too_big_mask = too_big[ccs]
     img_out = img.copy()
     img_out[too_big_mask] = 0
@@ -76,7 +68,7 @@ def removeFatlikeTissue(s, params):
     img_small = img_reduced & np.invert(s["img_mask_use"])
     img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9)
 
-    mask_dilate = morphology.dilation(img_small, selem=np.ones((kernel_size, kernel_size)))
+    mask_dilate = morphology.dilation(img_small, footprint=np.ones((kernel_size, kernel_size)))
     mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size)
 
     mask_fat = mask_dilate & ~mask_dilate_removed
@@ -100,8 +92,6 @@ def removeFatlikeTissue(s, params):
     s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean))
     s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max))
 
-
-
     s.addToPrintList("fatlike_tissue_removed_percent",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
 
@@ -118,7 +108,6 @@ def fillSmallHoles(s, params):
     img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size)
     img_small = img_reduced & np.invert(s["img_mask_use"])
 
-
     io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small))
     s["img_mask_small_removed"] = (img_small * 255) > 0
 
diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py
index 627f8ca..cd04be5 100644
--- a/histoqc/SaveModule.py
+++ b/histoqc/SaveModule.py
@@ -1,17 +1,17 @@
 import logging
 import os
-from skimage import io, img_as_ubyte
+from skimage import io
+from skimage.util import img_as_ubyte
 from distutils.util import strtobool
 from skimage import color
 import numpy as np
-
-import matplotlib.pyplot as plt
+from histoqc.BaseImage import BaseImage
 
 
 def blend2Images(img, mask):
-    if (img.ndim == 3):
+    if img.ndim == 3:
         img = color.rgb2gray(img)
-    if (mask.ndim == 3):
+    if mask.ndim == 3:
         mask = color.rgb2gray(mask)
     img = img[:, :, None] * 1.0  # can't use boolean
     mask = mask[:, :, None] * 1.0
@@ -19,7 +19,7 @@ def blend2Images(img, mask):
     return out
 
 
-def saveFinalMask(s, params):
+def saveFinalMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tsaveUsableRegion")
 
     mask = s["img_mask_use"]
@@ -36,37 +36,42 @@ def saveFinalMask(s, params):
     return
 
 
-def saveAssociatedImage(s, key:str, dim:int):
+def saveAssociatedImage(s: BaseImage, key: str, dim: int):
     logging.info(f"{s['filename']} - \tsave{key.capitalize()}")
-    osh = s["os_handle"]
+    image_handle = s.image_handle
 
-    if not key in osh.associated_images:
+    if key not in image_handle.associated_images:
         message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images"
         logging.warning(message)
         s["warnings"].append(message)
         return
     
     # get asscociated image by key
-    associated_img = osh.associated_images[key]
-    (width, height)  = associated_img.size
-
-    # calulate the width or height depends on dim
-    if width > height:
-        h = round(dim * height / width)
-        size = (dim, h)
-    else:
-        w = round(dim * width / height)
-        size = (w, dim)
-    
-    associated_img = associated_img.resize(size)
-    associated_img = np.asarray(associated_img)[:, :, 0:3]
-    io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img)
+    associated_img = image_handle.associated_images[key]
+    width, height = image_handle.__class__.backend_dim(associated_img)
+
+    if width * height == 0:
+        message = f"{s['filename']}- Irregular Size {width, height} of the Associated Images: {key}"
+        logging.warning(message)
+        s["warnings"].append(message)
+        return
+
+    aspect_ratio = width / height
+    size = image_handle.__class__.curate_to_max_dim(width, height, max_size=dim, aspect_ratio=aspect_ratio)
+    # to pillow handle
+    associated_img = image_handle.__class__.backend_to_pil(associated_img)
+    # resize the pil (RGB)
+    associated_img = associated_img.resize(size).convert("RGB")
+    # save the pil
+    associated_img.save(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png")
+
 
 def saveMacro(s, params):
     dim = params.get("small_dim", 500)
     saveAssociatedImage(s, "macro", dim)
     return
-    
+
+
 def saveMask(s, params):
     logging.info(f"{s['filename']} - \tsaveMaskUse")
     suffix = params.get("suffix", None)
@@ -80,6 +85,7 @@ def saveMask(s, params):
     # save mask
     io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"]))
 
+
 def saveThumbnails(s, params):
     logging.info(f"{s['filename']} - \tsaveThumbnail")
     # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one
diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py
index 0101ca4..07bf184 100644
--- a/histoqc/TileExtractionModule.py
+++ b/histoqc/TileExtractionModule.py
@@ -4,7 +4,6 @@
 are open.
 """
 import os
-import openslide
 import json
 from histoqc.BaseImage import BaseImage
 from typing import Callable, Dict, Any, List, Tuple, Union
@@ -14,7 +13,8 @@
 from contextlib import contextmanager
 from distutils.util import strtobool
 import logging
-from histoqc.import_wrapper.typing import Literal, get_args
+from typing_extensions import Literal, get_args
+from PIL.Image import Image as PILImage
 # from histoqc.import_wrapper.helper import dynamic_import
 # __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]]
 # Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions")
@@ -269,10 +269,10 @@ def max_tile_bbox_top_left_coord(rp_bbox: TYPE_BBOX_INT, work_tile_size: float,
         tile_max_left = left_rp + max_step_horiz * work_stride
         tile_max_top = top_rp + max_step_vert * work_stride
 
-        assert round(tile_max_left + work_tile_size) <= right_rp,\
+        assert round(tile_max_left + work_tile_size) <= right_rp, \
             f"left + size check" \
             f" {tile_max_left + work_tile_size} = {tile_max_left} + {work_tile_size} <= {right_rp} fail"
-        assert round(tile_max_top + work_tile_size) <= bottom_rp,\
+        assert round(tile_max_top + work_tile_size) <= bottom_rp, \
             f"top + size check" \
             f" {tile_max_top + work_tile_size} = {tile_max_top} + {work_tile_size} <= {bottom_rp} fail"
         return int(tile_max_top), int(tile_max_left)
@@ -345,8 +345,6 @@ def _tile_windows_helper(mask_use_for_tiles,
 
         """
         mask = mask_use_for_tiles
-        # image_handle: openslide.OpenSlide = s["os_handle"]
-        # img_w, img_h = image_handle.dimensions
         assert mask is not None, f"{filename}: mask is not initialized"
         assert isinstance(mask, np.ndarray), f"The mask is expected to be a Numpy NDArray"
 
@@ -393,7 +391,7 @@ def tile_windows(self,
         root_dict = self.__tile_window_cache
         entry = root_dict.get(key, None)
         if entry is None or force_rewrite:
-            # img_w, img_h = s['os_handle'].dimensions
+            # img_w, img_h = s.image_handle.dimensions
             root_dict[key] = TileExtractor._tile_windows_helper(mask_use_for_tiles, img_w, img_h,
                                                                 tile_size,
                                                                 tile_stride, tissue_thresh)
@@ -532,7 +530,7 @@ def valid_tile_extraction(self,
         tw: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h,
                                                 tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite)
         window_list_of_regions = tw.windows_on_original_image
-        image_handle: openslide.OpenSlide = s["os_handle"]
+        image_handle = s.image_handle
         valid_window_list_all_regions: List[List[Tuple[int, int, int, int]]] = []
         for region_windows in window_list_of_regions:
             region_windows: List[Tuple[int, int, int, int]]
@@ -541,7 +539,7 @@ def valid_tile_extraction(self,
                 window: Tuple[int, int, int, int]
                 # just to make the convention clear
                 location, size = TileExtractor.__window_convert(window)
-                region = image_handle.read_region(location, 0, size)
+                region: PILImage = image_handle.read_region(location, 0, size)
                 tile_np = np.array(region, copy=False)
                 valid_flag = screen_callbacks(tile_np)
                 if not valid_flag:
@@ -573,7 +571,7 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]):
 
         img_use_for_tiles = s.getImgThumb(s["image_work_size"])
         mask_use_for_tiles = s['img_mask_use']
-        image_handle: openslide.OpenSlide = s['os_handle']
+        image_handle = s.image_handle
         img_w, img_h = image_handle.dimensions
 
         tile_extractor = TileExtractor(s)
diff --git a/histoqc/__main__.py b/histoqc/__main__.py
index c0bd0c0..415f479 100644
--- a/histoqc/__main__.py
+++ b/histoqc/__main__.py
@@ -30,7 +30,9 @@ def main(argv=None):
     if argv is None:
         argv = sys.argv[1:]
 
-    parser = argparse.ArgumentParser(prog="histoqc", description='Run HistoQC main quality control pipeline for digital pathology images')
+    parser = argparse.ArgumentParser(prog="histoqc",
+                                     description='Run HistoQC main quality control pipeline'
+                                                 ' for digital pathology images')
     parser.add_argument('input_pattern',
                         help="input filename pattern (try: *.svs or target_path/*.svs ),"
                              " or tsv file containing list of files to analyze",
@@ -76,7 +78,7 @@ def main(argv=None):
         lm.logger.warning(f"Configuration file not set (--config), using default")
         config.read_string(read_config_template('default'))
     elif os.path.exists(args.config):
-        config.read(args.config) #Will read the config file
+        config.read(args.config)  # Will read the config file
     else:
         lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.")
         config.read_string(read_config_template(args.config))
@@ -223,5 +225,6 @@ def main(argv=None):
             )
     return 0
 
+
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py
index 5012ce4..6c1a4e1 100644
--- a/histoqc/_pipeline.py
+++ b/histoqc/_pipeline.py
@@ -17,6 +17,8 @@
 from importlib import import_module
 from logging.config import dictConfig
 from logging.handlers import QueueHandler
+from typing_extensions import Literal
+from typing import cast
 
 
 # --- logging helpers -------------------------------------------------
@@ -63,7 +65,8 @@ def setup_logging(*, capture_warnings, filter_warnings):
     })
 
     # configure warnings too...
-    warnings.filterwarnings(filter_warnings)
+    filter_type = Literal["default", "error", "ignore", "always", "module", "once"]
+    warnings.filterwarnings(cast(filter_type, filter_warnings))
     logging.captureWarnings(capture_warnings)
 
 
@@ -310,7 +313,7 @@ def write_headers(self, *args):
 
         Parameters
         ----------
-        state : dict
+        state: dict
             the current histoqc implementation writes the outputs to
             the header files, so *args supports `state` for now.
             overwrite in subclass to control header output behavior
diff --git a/histoqc/_worker.py b/histoqc/_worker.py
index 97e5ff6..54af16d 100644
--- a/histoqc/_worker.py
+++ b/histoqc/_worker.py
@@ -62,13 +62,9 @@ def worker(idx, file_name, *,
         raise exc
 
     else:
-        # TODO:
-        #   the histoqc workaround below is due an implementation detail in BaseImage:
-        #   BaseImage keeps an OpenSlide instance stored under os_handle and leaks a
-        #   file handle. This will need fixing in BaseImage.
-        #   -> best solution would be to make BaseImage a contextmanager and close
-        #      and cleanup the OpenSlide handle on __exit__
-        s["os_handle"] = None  # need to get rid of handle because it can't be pickled
+        # So long as the gc is triggered to delete the handle, the close is called to release the resources,
+        # as documented in the openslide and cuimage's source code.
+        s.image_handle = None
         return s
 
 
diff --git a/histoqc/annotations/annot_collection.py b/histoqc/annotations/annot_collection.py
index 30a187c..42eab60 100644
--- a/histoqc/annotations/annot_collection.py
+++ b/histoqc/annotations/annot_collection.py
@@ -2,7 +2,7 @@
 from types import MappingProxyType
 # from shapely.strtree import STRtree
 # from shapely.geometry import box as shapely_box
-from histoqc.import_wrapper.typing import Literal, get_args
+from typing_extensions import Literal, get_args
 from lazy_property import LazyProperty
 from .annotation.base import Annotation, Region, TYPE_RAW_LABEL
 from .annotation.imagescope import ImageScopeAnnotation
diff --git a/histoqc/annotations/annotation/base.py b/histoqc/annotations/annotation/base.py
index 36da725..d549e51 100644
--- a/histoqc/annotations/annotation/base.py
+++ b/histoqc/annotations/annotation/base.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
-from typing import Generic, TypeVar, Dict, Union, List, Tuple, TypedDict
+from typing import TypeVar, Dict, Union, List, Tuple, Generic
+from typing_extensions import TypedDict
 from lazy_property import LazyProperty
 from shapely.geometry import Polygon, MultiPolygon
 import logging
diff --git a/histoqc/annotations/annotation/geojson.py b/histoqc/annotations/annotation/geojson.py
index cf5bf2e..fc1175a 100644
--- a/histoqc/annotations/annotation/geojson.py
+++ b/histoqc/annotations/annotation/geojson.py
@@ -1,5 +1,5 @@
 from typing import List, Dict, Callable, Any  # Literal, get_args
-from histoqc.import_wrapper.typing import Literal, get_args
+from typing_extensions import Literal, get_args
 from ..io_utils.json import load_json
 from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET
 
diff --git a/histoqc/config/__main__.py b/histoqc/config/__main__.py
index 3f2b1c0..493e09e 100644
--- a/histoqc/config/__main__.py
+++ b/histoqc/config/__main__.py
@@ -9,7 +9,7 @@ def main(argv=None):
     if argv is None:
         argv = sys.argv[1:]
 
-    parser = argparse.ArgumentParser(prog="histoqc.config",description="Show example configuration files")
+    parser = argparse.ArgumentParser(prog="histoqc.config", description="Show example configuration files")
     parser.add_argument('--list',
                         action='store_true',
                         help='list available configs')
diff --git a/histoqc/import_wrapper/__init__.py b/histoqc/import_wrapper/__init__.py
index e69de29..e216514 100644
--- a/histoqc/import_wrapper/__init__.py
+++ b/histoqc/import_wrapper/__init__.py
@@ -0,0 +1 @@
+from .helper import dynamic_import
diff --git a/histoqc/import_wrapper/helper.py b/histoqc/import_wrapper/helper.py
index 7804e3a..aa97227 100644
--- a/histoqc/import_wrapper/helper.py
+++ b/histoqc/import_wrapper/helper.py
@@ -1,23 +1,84 @@
+from __future__ import annotations
 import importlib
-from typing import Union
+from typing import Optional, List
 
 
-def dynamic_import(module_name: str, attribute_name: str, surrogate: Union[str, None]):
-    """
-    Dynamically import the components from surrogate module if not available (e.g., `Literal` is only available in
-    typing from python3.8 but typing_extension provides the same functionality for python <=3.7.
+def __dynamic_import(module_name: str, attribute_name: Optional[str]):
+    """Base function to import a module or a component from the module via importlib
+
     Args:
-        module_name:
-        attribute_name:
-        surrogate:
+        module_name: name of the module
+        attribute_name: name of the attribute. If None, then returns the module itself.
 
     Returns:
+        imported module or component
 
+    Raises
+        ImportError: if the module cannot be imported or the attribute is not found.
     """
+    assert module_name is not None and isinstance(module_name, str)
     module = importlib.import_module(module_name)
-    attribute = getattr(module, attribute_name, None)
-    if attribute is not None:
-        return attribute
-    if surrogate is not None:
-        return dynamic_import(surrogate, attribute_name, None)
-    raise ImportError(f"Cannot Import {attribute_name} from either {module_name} or {surrogate}")
+    if attribute_name is None:
+        return module
+    if not hasattr(module, attribute_name):
+        raise ImportError(f"'{module_name}' has no attribute '{attribute_name}'")
+    return getattr(module, attribute_name)
+
+
+def __validate_names(names: str | List[str], pad_length: Optional[int] = 1) -> List[Optional[str]]:
+    """Validate the names provided to be not None and in form of a list.
+
+    If the names is a str, returns a list with a singleton value of names and length of pad_length.
+
+    Args:
+        names: the value to be validated
+        pad_length: length to
+
+    Returns:
+        List of names.
+    """
+    if isinstance(names, str) or names is None:
+        return [names] * pad_length
+    if isinstance(names, list) and len(names) == 1 and pad_length > 1:
+        return names * pad_length
+    assert isinstance(names, List), f"{type(names)}"
+    return names
+
+
+def dynamic_import(module_names: List[str] | str,
+                   attr_names: Optional[List[Optional[str]] | str],
+                   return_first: bool):
+    """Dynamically import the module or attribute from the modules via importlib.
+
+    Priority is defined by the order of module_names/attr_names. The function will continue to try to import the module
+    until all attempts fail and raise an ImportError in this case.
+    If return_first is True, the function only returns the first viable module/attribute. Otherwise, it returns the list
+    of all available modules/attributes.
+
+    Args:
+        module_names: names of the modules. If it is str it will be padded to a list of single element: [names]
+        attr_names: names of the attributes. If None, then only the module is imported.
+            If it is a str or a list of single element, it will be padded to a list of same values
+            with same length as module_names.
+        return_first: If return_first is True, the function only returns the first viable module/attribute.
+    Returns:
+        imported module or attribute
+
+    Raises:
+        ImportError: if the module cannot be imported or the attribute is not found.
+    """
+    module_names = __validate_names(module_names)
+    attr_names = __validate_names(attr_names, pad_length=len(module_names))
+    assert len(module_names) == len(attr_names)
+    out_list = []
+    for module, attr in zip(module_names, attr_names):
+        try:
+            result = __dynamic_import(module, attr)
+            if return_first:
+                return result
+            out_list.append(result)
+        except ImportError:
+            continue
+    if len(out_list) == 0:
+        raise ImportError(f"Cannot Import from: {module_names}, {attr_names}")
+    return out_list
diff --git a/histoqc/import_wrapper/openslide.py b/histoqc/import_wrapper/openslide.py
index aaa007f..2e7c67c 100644
--- a/histoqc/import_wrapper/openslide.py
+++ b/histoqc/import_wrapper/openslide.py
@@ -1,9 +1,16 @@
+"""
+For python >=3.8, the behavior of import dlls in Windows is changed. add_dll_directory is added to os and must be
+manually called to include the path(s) of binaries. (in previous versions, extending the PATH variable is enough)
+"""
 import os
 if hasattr(os, "add_dll_directory"):
     # specify your own openslide binary locations
     with os.add_dll_directory(os.path.join(os.getcwd(), 'bin')):
+        # noinspection PyUnresolvedReferences
         import openslide
 else:
     # os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH']
     # #can either specify openslide bin path in PATH, or add it dynamically
+    # noinspection PyUnresolvedReferences
     import openslide
+
diff --git a/histoqc/import_wrapper/typing.py b/histoqc/import_wrapper/typing.py
deleted file mode 100644
index 4e4ed03..0000000
--- a/histoqc/import_wrapper/typing.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from .helper import dynamic_import
-from typing import Type, Callable, Tuple, Any, TypeVar
-
-__TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]]
-Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions")
-get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions")
diff --git a/histoqc/tests/test_pipeline_cli.py b/histoqc/tests/test_pipeline_cli.py
index 244a075..2ebcf9d 100644
--- a/histoqc/tests/test_pipeline_cli.py
+++ b/histoqc/tests/test_pipeline_cli.py
@@ -56,6 +56,7 @@ def minimal_config(tmp_path_factory):
 
             [BaseImage.BaseImage]
             image_work_size = 1.25x
+            handles = openslide
 
             #three options: relative2mask, absolute, relative2image
             mask_statistics = relative2mask
@@ -87,7 +88,8 @@ def test_cli_multiprocess_batching(multi_svs_dir, tmp_path, minimal_config, tmp_
         '--symlink', os.fspath(spth),
         '*.svs'
     ]) == 0
-    assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log', 'results_0.tsv', 'results_1.tsv'])
+    assert _filenames_in(tmp_path) == _filenames_in(multi_svs_dir).union(['error.log',
+                                                                          'results_0.tsv', 'results_1.tsv'])
 
 
 @pytest.fixture(scope='module')
diff --git a/histoqc/tests/test_ui_cli.py b/histoqc/tests/test_ui_cli.py
index c72aa4e..ff91739 100644
--- a/histoqc/tests/test_ui_cli.py
+++ b/histoqc/tests/test_ui_cli.py
@@ -1,8 +1,6 @@
 import os
 import threading
 import time
-from http.server import HTTPServer
-from typing import Optional
 
 import pytest
 import requests
diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py
new file mode 100644
index 0000000..c5b5713
--- /dev/null
+++ b/histoqc/wsi_handles/base.py
@@ -0,0 +1,340 @@
+from abc import ABC, abstractmethod
+from histoqc.import_wrapper import dynamic_import
+import logging
+from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic
+import numpy as np
+from PIL.Image import Image as PILImage
+from typing_extensions import final
+import os
+
+from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER
+
+T = TypeVar('T')
+Backend = TypeVar('Backend')
+ARRAY = TypeVar('ARRAY')
+
+
+class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]):
+
+    handle: T
+    fname: str
+
+    @staticmethod
+    def curate_shorter_edge(width, height, limit, aspect_ratio):
+        """Simulate the PIL.Image.Image.thumbnail approach to curate the size.
+
+         The target size should preserve the aspect ratio.
+
+        Args:
+            width:
+            height:
+            limit:
+            aspect_ratio:
+
+        Returns:
+
+        """
+        if height > width:
+            # limit the shorter one
+            width = max(width, limit)
+            height = round(width / aspect_ratio)
+        else:
+            height = max(height, limit)
+            width = round(height * aspect_ratio)
+        return width, height
+
+    @staticmethod
+    def curate_to_max_dim(width, height, max_size, aspect_ratio):
+        """Set the longer one of width and height to max_size while preserving the aspect ratio.
+
+        Args:
+            width:
+            height:
+            max_size:
+            aspect_ratio:
+
+        Returns:
+            width, height tuple
+        """
+        if height > width:
+            height = max_size
+            width = round(height * aspect_ratio)
+        else:
+            width = max_size
+            height = round(width / aspect_ratio)
+        return width, height
+
+    @property
+    @abstractmethod
+    def associated_images(self) -> Mapping[str, Backend]:
+        ...
+
+    @property
+    @abstractmethod
+    def background_color(self) -> str:
+        ...
+        
+    @property
+    @abstractmethod
+    def bounding_box(self) -> Tuple[int, int, int, int]:
+        ...
+
+    @property
+    @abstractmethod
+    def has_bounding_box(self) -> bool:
+        ...
+    
+    @property
+    @abstractmethod
+    def dimensions(self) -> Tuple[int, int]:
+        """
+
+        Returns:
+            (width, height) tuple
+        """
+        ...
+
+    @property
+    @abstractmethod
+    def magnification(self) -> str:
+        ...
+
+    @property
+    @abstractmethod
+    def level_count(self) -> int:
+        ...
+
+    @property
+    @abstractmethod
+    def level_dimensions(self) -> Sequence[Tuple[int, int]]:
+        ...
+
+    @property
+    @abstractmethod
+    def level_downsamples(self) -> Sequence[float]:
+        ...
+
+    @property
+    @abstractmethod
+    def vendor(self) -> str:
+        ...
+
+    @property
+    @abstractmethod
+    def mpp_x(self) -> str:
+        ...
+
+    @property
+    @abstractmethod
+    def mpp_y(self) -> str:
+        ...
+
+    @property
+    @abstractmethod
+    def comment(self) -> str:
+        ...
+
+    @abstractmethod
+    def get_thumbnail(self, new_dim) -> Union[ARRAY, Backend]:
+        ...
+
+    @abstractmethod
+    def backend_rgba2rgb(self, img) -> Backend:
+        """Remove the alpha channel with a predefined background color blended into the image.
+
+        Args:
+            img:
+
+        Returns:
+            R
+        """
+        ...
+
+    @abstractmethod
+    def region_backend(self, location, level, size, **kwargs):
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def backend_to_pil(region: Union[Backend, ARRAY]) -> PILImage:
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def array_to_numpy(arr: ARRAY) -> np.ndarray:
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def backend_dim(region: Backend) -> Tuple[int, int]:
+        """
+        Defines the unified interface to obtain BACKEND handle type.
+        Args:
+            region:
+
+        Returns:
+
+        """
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def array_shape(arr: ARRAY) -> Tuple[int, int]:
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def backend_to_array(region: Union[Backend, ARRAY]) -> ARRAY:
+        ...
+    
+    def read_region(self, location, level, size, **kwargs) -> PILImage:
+        region = self.region_backend(location=location, level=level, size=size, **kwargs)
+        return self.__class__.backend_to_pil(region)
+
+    @abstractmethod
+    def read_label(self) -> Backend:
+        ...
+
+    @abstractmethod
+    def read_macro(self) -> Backend:
+        ...
+
+    @classmethod
+    @abstractmethod
+    def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int]) -> ARRAY:
+        ...
+
+    @abstractmethod
+    def get_best_level_for_downsample(self, downsample_factor: float):
+        ...
+
+    def curated_best_level_for_downsample(self, downsample_factor: float) -> Tuple[int, bool]:
+        relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in self.level_downsamples]
+        level = np.where(relative_down_factors_idx)[0]
+        if level.size:
+            return level[0], True
+        return self.get_best_level_for_downsample(downsample_factor), False
+
+    @staticmethod
+    @abstractmethod
+    def grid_stack(grid: List[List[ARRAY]]):
+        ...
+
+    def resize_tile_downward(self, target_downsampling_factor, level,
+                             win_size: int = 2048, **read_region_kwargs) -> List[List[ARRAY]]:
+
+        (bx, by, bwidth, bheight) = self.bounding_box
+        end_x = bx + bwidth
+        end_y = by + bheight
+
+        closest_downsampling_factor = self.level_downsamples[level]
+
+        # create a new img
+        grid = []
+        for x in range(bx, end_x, win_size):
+            row_piece = []
+            for y in range(by, end_y, win_size):
+                win_width, win_height = [win_size] * 2
+                # Adjust extraction size for endcut
+                if end_x < x + win_width:
+                    win_width = end_x - x
+                if end_y < y + win_height:
+                    win_height = end_y - y
+
+                win_down_width = int(round(win_width / target_downsampling_factor))
+                win_down_height = int(round(win_height / target_downsampling_factor))
+
+                win_width = int(round(win_width / closest_downsampling_factor))
+                win_height = int(round(win_height / closest_downsampling_factor))
+
+                # TODO Note: this isn't very efficient, and if more efficiency isneeded
+
+                # TODO cont. Separate the public interface read_region -> PIL.Image to the internal data backend
+                # TODO (data_from_region)
+                # TODO e.g., cupy is far more efficient for resize w/ interpolation and antialiasing.
+                closest_region = self.region_backend(location=(x, y), level=level, size=(win_width, win_height),
+                                                     **read_region_kwargs)
+                if np.shape(closest_region)[-1] == 4:
+                    closest_region = self.backend_rgba2rgb(closest_region)
+                closest_region_arr = self.__class__.backend_to_array(closest_region)
+                target_region = self.__class__.region_resize_arr(closest_region_arr,
+                                                                 (win_down_width, win_down_height))
+                row_piece.append(target_region)
+            # row_piece = np.concatenate(row_piece, axis=0)
+            grid.append(row_piece)
+        # grid = np.concatenate(output, axis=1)
+        #
+        return self.__class__.grid_stack(grid)
+
+    def best_thumb(self, x: int, y: int, dims: Tuple[int, int],
+                   target_sampling_factor: float, **read_region_kwargs) -> ARRAY:
+
+        # get thumb from og
+        if not self.has_bounding_box:
+            max_dim = dims[0] if dims[0] > dims[1] else dims[1]
+            return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim)))
+
+        (level, is_exact_level) = self.curated_best_level_for_downsample(target_sampling_factor)
+
+        # check if to get the existing level
+        if is_exact_level:
+            backend: Backend = self.read_region((x, y), level, dims)
+            return self.__class__.backend_to_array(self.backend_rgba2rgb(backend)) \
+                if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend)
+        # scale down the thumb img from the next high level
+        else:
+            return self.resize_tile_downward(target_sampling_factor, level, win_size=2048, **read_region_kwargs)
+
+    @staticmethod
+    def parse_wsi_handles(handle_list: str | List[str], delimiter: str,
+                          wsi_handle_dict: Dict[str, Tuple[str, str]]) -> Tuple[List[str], List[str]]:
+        if isinstance(handle_list, str):
+            handle_list = handle_list.split(delimiter)
+        module_list = []
+        attr_list = []
+        for handle_type in handle_list:
+            handle_type = handle_type.strip()
+            if handle_type not in wsi_handle_dict:
+                msg = f"WSIImageHandle: \"{handle_type}\" is not a registered handle"
+                logging.warning(msg)
+                continue
+            result = wsi_handle_dict[handle_type]
+            assert len(result) == 2, f"{result}"
+            module, attr = wsi_handle_dict[handle_type]
+            module_list.append(module)
+            attr_list.append(attr)
+        return module_list, attr_list
+
+    @classmethod
+    def __create_handle(cls, fname: str,
+                        handle_class_list: List[Callable[[str], "WSIImageHandle"]]) -> "WSIImageHandle":
+        image_handle = None
+        assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file"
+        for handle_class in handle_class_list:
+            # noinspection PyBroadException
+            try:
+                image_handle = handle_class(fname)
+            except Exception:
+                # current wsi handle class doesn't support this file
+                msg = f"WSIImageHandle: \"{handle_class}\" doesn't support {fname}"
+                logging.warning(msg)
+                continue
+        if image_handle is None:
+            # error: no handles support this file
+            msg = f"WSIImageHandle: can't find the support wsi handles - {fname}"
+            logging.error(msg)
+            raise NotImplementedError(msg)
+        return image_handle
+
+    @classmethod
+    @final
+    def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle":
+        # get handles list
+        module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES)
+        handle_class_list = dynamic_import(module_list, attr_list, return_first=False)
+        image_handle = cls.__create_handle(fname, handle_class_list)
+        return image_handle
+
+    def __init__(self, fname: str):
+        self.fname = fname
+
diff --git a/histoqc/wsi_handles/constants.py b/histoqc/wsi_handles/constants.py
new file mode 100644
index 0000000..16a297b
--- /dev/null
+++ b/histoqc/wsi_handles/constants.py
@@ -0,0 +1,20 @@
+from typing import Dict, Tuple
+from typing_extensions import Literal, get_args
+
+TYPE_OPENSLIDE = Literal["openslide"]
+KEY_OPENSLIDE: str = get_args(TYPE_OPENSLIDE)[0]
+MODULE_OPENSLIDE: str = "histoqc.wsi_handles.openslide_handle"
+CLASS_OPENSLIDE: str = "OpenSlideHandle"
+
+TYPE_CUCIM = Literal["cucim"]
+KEY_CUCIM: str = get_args(TYPE_CUCIM)[0]
+MODULE_CUCIM: str = "histoqc.wsi_handles.cuimage_handle"
+CLASS_CUCIM: str = "CuImageHandle"
+
+WSI_HANDLES: Dict[str, Tuple[str, str]] = {
+    KEY_OPENSLIDE: (MODULE_OPENSLIDE, CLASS_OPENSLIDE),
+    # todo: add unified interface
+    # KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM),
+}
+
+HANDLE_DELIMITER = ','
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
new file mode 100644
index 0000000..e37172a
--- /dev/null
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+from PIL.Image import Image as PILImage
+from cucim.clara import CuImage
+from .base import WSIImageHandle
+from PIL import Image
+from ..import_wrapper.openslide import openslide
+import cupy as cp
+from typing import List, Union, Tuple, Mapping
+from typing import cast
+from lazy_property import LazyProperty
+import numpy as np
+from cucim import skimage as c_skimage
+
+
+class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
+
+    handle: CuImage
+    fname: str
+
+    # TODO: standalone parser of vendor information
+    dummy_handle: openslide.OpenSlide
+
+    def backend_rgba2rgb(self, img: CuImage) -> CuImage:
+        # todo: it appears that CuImage does not take care of the alpha channel at all.
+        return img
+
+    @classmethod
+    def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int]) -> cp.ndarray:
+        w, h, *_ = new_size_wh
+        arr = cp.array(data)
+        return c_skimage.transform.resize(arr, output_shape=(h, w))
+
+    def __init__(self, fname: str):
+        super().__init__(fname)
+        self.handle = CuImage(fname)
+        # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a
+        # todo - native unified metadata interface for different vendors.
+        self.dummy_handle = openslide.OpenSlide(fname)
+
+    @LazyProperty
+    def background_color(self):
+        return f"#{self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}"
+
+    @LazyProperty
+    def bounding_box(self):
+        dim_width, dim_height = self.dimensions
+        x = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0))
+        y = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0))
+        width = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width))
+        height = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height))
+        return x, y, width, height
+
+    @LazyProperty
+    def has_bounding_box(self):
+        return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties
+                and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties
+                and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle.properties
+                and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle.properties
+                )
+
+    @LazyProperty
+    def dimensions(self):
+        return tuple(self.handle.metadata['cucim']['shape'][:2][::-1])
+
+    @LazyProperty
+    def magnification(self):
+        return self.dummy_handle.properties.get("openslide.objective-power") or \
+            self.dummy_handle.properties.get("aperio.AppMag")
+
+    @property
+    def level_count(self):
+        return self.handle.metadata['cucim']['resolutions']['level_count']
+
+    @property
+    def level_dimensions(self):
+        return self.handle.metadata['cucim']['resolutions']['level_dimensions']
+
+    @property
+    def level_downsamples(self):
+        return self.handle.metadata['cucim']['resolutions']['level_downsamples']
+
+    @property
+    def vendor(self):
+        return self.dummy_handle.properties.get("openslide.vendor", "NA")
+
+    @property
+    def mpp_x(self):
+        return self.dummy_handle.properties.get("openslide.mpp-x", "NA")
+
+    @property
+    def mpp_y(self):
+        return self.dummy_handle.properties.get("openslide.mpp-y", "NA")
+
+    @property
+    def comment(self):
+        return self.dummy_handle.properties.get("openslide.comment", "NA")
+
+    def get_thumbnail(self, new_dim):
+        """Get thumbnail
+
+        Args:
+            new_dim: Tuple
+
+        Returns:
+
+        """
+        # from openslide
+        downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim)))
+        level = self.get_best_level_for_downsample(downsample)
+        thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level]))
+        # resize
+        thumb_cp = cp.array(thumb, copy=False)
+        target_w, target_h = (x // int(downsample) for x in self.dimensions)
+        aspect_ratio = self.dimensions[0] / self.dimensions[1]
+
+        target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
+        return c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w))
+
+    def get_best_level_for_downsample(self, down_factor: float) -> int:
+        """Return the largest level that's smaller than the target downsample factor, consistent with openslide.
+
+        Args:
+            down_factor:
+
+        Returns:
+
+        """
+        level_downsamples_arr = np.asarray(self.level_downsamples)
+        # not exceeding the current downsample level
+        down_indices = np.where(level_downsamples_arr <= down_factor)[0]
+        down_values = level_downsamples_arr[down_indices]
+        # find the indices of the down_indices that points to the best downsample factor value
+        return down_indices[down_values.argmax()]
+
+    def region_backend(self, location, level, size, **kwargs):
+        return self.handle.read_region(location=location, level=level, size=size, **kwargs)
+
+    @staticmethod
+    def backend_to_array(region: Union[CuImage, cp.ndarray]) -> cp.ndarray:
+        return cp.array(region, copy=False)
+
+    @staticmethod
+    def array_to_numpy(arr: cp.ndarray) -> np.ndarray:
+        return arr.get()
+
+    @classmethod
+    def backend_to_pil(cls, region: CuImage) -> PILImage:
+        return Image.fromarray(cls.backend_to_array(region).get())
+
+    def read_label(self) -> CuImage:
+        return self.handle.associated_image("label")
+
+    def read_macro(self) -> CuImage:
+        return self.handle.associated_image("macro")
+
+    @LazyProperty
+    def associated_images(self) -> Mapping:
+        keys = self.handle.associated_images
+        return {k: self.handle.associated_image(k) for k in keys}
+
+    @staticmethod
+    def grid_stack(grid: List[List[cp.ndarray]]):
+        return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1)
+
+    @staticmethod
+    def backend_dim(region: CuImage) -> Tuple[int, int]:
+        return cast(Tuple[int, int], tuple(region.size()[:2]))
+
+    @staticmethod
+    def array_shape(arr: cp.ndarray) -> Tuple[int, ...]:
+        return arr.shape
diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py
new file mode 100644
index 0000000..6388329
--- /dev/null
+++ b/histoqc/wsi_handles/openslide_handle.py
@@ -0,0 +1,145 @@
+import PIL.Image
+import numpy as np
+
+from .base import WSIImageHandle
+from histoqc.import_wrapper.openslide import openslide
+from typing import Union, Tuple, Sequence, List, Mapping
+from typing import cast
+from PIL.Image import Image as PILImage
+from .utils import rgba2rgb_pil
+from PIL import Image
+
+
+class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]):
+    _background_color: str
+    _magnification_factor: str
+    _has_bounding_box: bool
+    fname: str
+    handle: openslide.OpenSlide
+
+    def backend_rgba2rgb(self, img) -> PILImage:
+        return rgba2rgb_pil(img, self.background_color)
+
+    def __init__(self, fname):
+        super().__init__(fname)
+        self.handle = openslide.OpenSlide(fname)
+        self._has_bounding_box = True
+        self._bounding_box = self.__get_bounding_box()
+        
+        # get magnification factor from wsi slide
+        self._magnification_factor = self.handle.properties.get("openslide.objective-power") or \
+            self.handle.properties.get("aperio.AppMag")
+        
+        # get background color 
+        self._background_color = f"#{self.handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}"
+
+    def __get_bounding_box(self) -> Tuple[int, int, int, int]:
+        (dim_width, dim_height) = self.handle.dimensions
+    
+        try:
+            x = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA'))
+            y = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA'))
+            width = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA'))
+            height = int(self.handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA'))
+            return x, y, width, height
+        # if any attribute is 'NA' and fails the int() cast
+        except ValueError:
+            self._has_bounding_box = False
+            return 0, 0, dim_width, dim_height
+
+    @property
+    def background_color(self):
+        return self._background_color
+
+    @property
+    def has_bounding_box(self) -> bool:
+        return self._has_bounding_box
+    
+    @property
+    def bounding_box(self) -> Tuple[int, int, int, int]:
+        return self._bounding_box
+
+    @property
+    def dimensions(self) -> Tuple[int, int]:
+        return self.handle.dimensions
+
+    @property
+    def magnification(self) -> Union[str, None]:
+        return self._magnification_factor
+
+    @property
+    def level_count(self) -> int:
+        return self.handle.level_count
+
+    @property
+    def level_dimensions(self) -> Sequence[Tuple[int, int]]:
+        return self.handle.level_dimensions
+
+    @property
+    def level_downsamples(self):
+        return self.handle.level_downsamples
+
+    @property
+    def vendor(self):
+        return self.handle.properties.get("openslide.vendor", "NA")
+
+    @property
+    def mpp_x(self) -> str:
+        return self.handle.properties.get("openslide.mpp-x", "NA")
+
+    @property
+    def mpp_y(self) -> str:
+        return self.handle.properties.get("openslide.mpp-y", "NA")
+
+    @property
+    def comment(self) -> str:
+        return self.handle.properties.get("openslide.comment", "NA")
+
+    @classmethod
+    def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int]):
+        return np.array(Image.fromarray(data).resize(new_size_wh), copy=False)
+
+    def get_thumbnail(self, new_dim):
+        return self.handle.get_thumbnail(new_dim)
+
+    def get_best_level_for_downsample(self, down_factor):
+        return self.handle.get_best_level_for_downsample(down_factor)
+
+    def region_backend(self, location, level, size, **kwargs):
+        return self.handle.read_region(location, level, size)
+
+    @staticmethod
+    def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage:
+        if isinstance(region, np.ndarray):
+            return PIL.Image.fromarray(region)
+        return region
+
+    @staticmethod
+    def backend_to_array(region: PILImage) -> np.ndarray:
+        return np.array(region)
+
+    @staticmethod
+    def array_to_numpy(arr: np.ndarray) -> np.ndarray:
+        return np.array(arr)
+
+    def read_label(self):
+        return self.handle.associated_images["label"]
+
+    def read_macro(self):
+        return self.handle.associated_images["macro"]
+
+    @property
+    def associated_images(self) -> Mapping[str, PILImage]:
+        return self.handle.associated_images
+
+    @staticmethod
+    def grid_stack(grid: List[List[np.ndarray]]):
+        return np.concatenate([np.concatenate(row, axis=0) for row in grid], axis=1)
+
+    @staticmethod
+    def backend_dim(region: PILImage) -> Tuple[int, int]:
+        return cast(Tuple[int, int], region.size)
+
+    @staticmethod
+    def array_shape(arr: np.ndarray) -> Tuple[int, ...]:
+        return arr.shape
diff --git a/histoqc/wsi_handles/utils.py b/histoqc/wsi_handles/utils.py
new file mode 100644
index 0000000..fcceafc
--- /dev/null
+++ b/histoqc/wsi_handles/utils.py
@@ -0,0 +1,38 @@
+from PIL import Image
+from PIL.Image import Image as PILImage
+from typing import Union, Iterable
+
+
+def hex_to_rgb(hex_color: str):
+    if hex_color.startswith('#'):
+        hex_color = hex_color[1:]
+
+    if len(hex_color) != 6:
+        raise ValueError(f"Invalid hex triplets. Length: {len(hex_color)}")
+
+    rgb_color = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+    return rgb_color
+
+
+def _validate_numerics(data: Iterable[float]):
+    if not isinstance(data, Iterable):
+        return False
+    return all([isinstance(x, float) for x in data])
+
+
+def validate_color(background_color: Union[str, Iterable[float], float]):
+    # if str -> assume a hex triplet
+    if isinstance(background_color, str):
+        return hex_to_rgb(background_color)
+    # must be numeric, or sequence of numeric
+    if isinstance(background_color, float):
+        return background_color
+    assert _validate_numerics(background_color), (f"background color must be a hex triplet string, a number,"
+                                                  f" or a sequence of numbers")
+    return tuple(x for x in background_color)
+
+
+def rgba2rgb_pil(img: PILImage, background_color) -> PILImage:
+    thumb = Image.new("RGB", img.size, validate_color(background_color))
+    thumb.paste(img, None, img)
+    return thumb
diff --git a/pyproject.toml b/pyproject.toml
index 7fb7892..e068ec1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,3 +24,7 @@ exclude_lines = [
   "if MYPY:",
   "except ImportError:",
 ]
+
+[options.extras_require]
+cucim = ["cupy", "cucim"]
+dicom = ["wsidicom"]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index ac20fff..e6a092b 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,10 @@
         "version_scheme": "post-release",
     },
     setup_requires=['setuptools_scm'],
+    extras_require={
+        "dicom": ["wsidicom"],
+        "cucim": ["cucim", "cupy"],
+    },
     package_data={
         'histoqc.config': ['*.ini'],
         'histoqc.data': data_files,

From decaa815a9a8d66111c4d7563dafedff89c70800 Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Thu, 2 May 2024 09:06:28 -0400
Subject: [PATCH 2/6] Unified CPU/GPU interface and array adaptor. Modules
 updated

---
 histoqc/AnnotationModule.py               |   6 +-
 histoqc/BaseImage.py                      |  38 ++-
 histoqc/BasicModule.py                    |  32 ++-
 histoqc/BlurDetectionModule.py            |  31 ++-
 histoqc/BrightContrastModule.py           |  67 ++++-
 histoqc/BubbleRegionByRegion.py           |  60 ++--
 histoqc/ClassificationModule.py           | 144 ++++++----
 histoqc/DeconvolutionModule.py            |  14 +-
 histoqc/HistogramModule.py                |  34 ++-
 histoqc/LightDarkModule.py                |  69 +++--
 histoqc/LocalTextureEstimationModule.py   |  23 +-
 histoqc/MorphologyModule.py               |  74 ++---
 histoqc/SaveModule.py                     |  47 ++--
 histoqc/TileExtractionModule.py           |  15 +-
 histoqc/__main__.py                       |  10 +-
 histoqc/_worker.py                        |  14 +-
 histoqc/array_adapter/__init__.py         |   2 +
 histoqc/array_adapter/adapter.py          | 316 ++++++++++++++++++++++
 histoqc/array_adapter/array_api_compat.py |   4 +
 histoqc/array_adapter/func_mapping.py     |  51 ++++
 histoqc/array_adapter/implementation.py   |   0
 histoqc/array_adapter/typing.py           |  13 +
 histoqc/config/config.ini                 |   1 +
 histoqc/import_wrapper/cupy_extra.py      |   6 +
 histoqc/wsi_handles/base.py               |  25 ++
 histoqc/wsi_handles/constants.py          |   2 +-
 histoqc/wsi_handles/cuimage_handle.py     |  19 +-
 histoqc/wsi_handles/openslide_handle.py   |  10 +
 imported_functions_list.txt               |   0
 29 files changed, 880 insertions(+), 247 deletions(-)
 create mode 100644 histoqc/array_adapter/__init__.py
 create mode 100644 histoqc/array_adapter/adapter.py
 create mode 100644 histoqc/array_adapter/array_api_compat.py
 create mode 100644 histoqc/array_adapter/func_mapping.py
 create mode 100644 histoqc/array_adapter/implementation.py
 create mode 100644 histoqc/array_adapter/typing.py
 create mode 100644 histoqc/import_wrapper/cupy_extra.py
 create mode 100644 imported_functions_list.txt

diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py
index 67414f6..b36a7fc 100644
--- a/histoqc/AnnotationModule.py
+++ b/histoqc/AnnotationModule.py
@@ -1,6 +1,7 @@
 import logging
 from typing import List, Tuple
 from histoqc.BaseImage import printMaskHelper, BaseImage
+from histoqc.array_adapter import ArrayDevice, ArrayAdapter
 from skimage import io
 from skimage.util import img_as_ubyte
 import os
@@ -76,6 +77,8 @@ def getParams(s: BaseImage, params):
 
 def saveAnnotationMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetAnnotationMask")
+    # quite pointless to enforce GPU acceleration here. Force to use CPU mode
+    adaptor = ArrayAdapter.build(input_device=ArrayDevice.CPU, output_device=ArrayDevice.CPU)
 
     (ann_format, file_path, suffix) = getParams(s, params)
 
@@ -107,13 +110,14 @@ def saveAnnotationMask(s: BaseImage, params):
     (off_x, off_y, ncol, nrow) = s["img_bbox"]
     resize_factor = np.shape(s["img_mask_use"])[1] / ncol
     height, width = s["img_mask_use"].shape
+
     annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0
 
     mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png"
     io.imsave(mask_file_name, img_as_ubyte(annotationMask))
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = prev_mask & annotationMask
+    s["img_mask_use"] = adaptor.and_(prev_mask, annotationMask)
     s.addToPrintList("getAnnotationMask",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
 
diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py
index 91e1a7e..aa507b5 100644
--- a/histoqc/BaseImage.py
+++ b/histoqc/BaseImage.py
@@ -6,9 +6,10 @@
 import dill
 from distutils.util import strtobool
 import re
-from typing import Union, Tuple, cast
+from typing import Union, Tuple, cast, Optional
 from histoqc.wsi_handles.base import WSIImageHandle
-from histoqc.wsi_handles.constants import KEY_OPENSLIDE
+from histoqc.wsi_handles.constants import KEY_OPENSLIDE, KEY_CUCIM
+from histoqc.array_adapter.typing import TYPE_ARRAY
 _REGEX_MAG = r"^(\d?\.?\d*X?)"
 _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE)
 MAG_NA = None
@@ -35,18 +36,21 @@
 
 class BaseImage(dict):
 
-    __image_handle: WSIImageHandle
+    _image_handle: WSIImageHandle
 
     @property
     def image_handle(self) -> WSIImageHandle:
-        return self.__image_handle
+        return self._image_handle
 
     @image_handle.setter
     def image_handle(self, image_handle: WSIImageHandle):
-        self.__image_handle = image_handle
+        self._image_handle = image_handle
 
     def __init__(self, fname, fname_outdir, params):
         dict.__init__(self)
+        handles = params.get("handles", KEY_CUCIM)
+        # dynamically load wsi image handle
+        self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles)
 
         self.in_memory_compression = strtobool(params.get("in_memory_compression", "False"))
 
@@ -61,9 +65,6 @@ def __init__(self, fname, fname_outdir, params):
         self["dir"] = os.path.dirname(fname)
 
         # get handles from config
-        handles = params.get("handles", KEY_OPENSLIDE)
-        # dynamically load wsi image handle
-        self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles)
 
         self["image_base_size"] = self.image_handle.dimensions
         self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box", "False"))
@@ -95,16 +96,29 @@ def __init__(self, fname, fname_outdir, params):
 
         self["completed"] = []
 
+    @staticmethod
+    def is_img_data(key: str) -> bool:
+        return key.startswith("img") and key != "img_bbox"
+
+    def _sync_to_handle(self, key, value):
+        if not self.__class__.is_img_data(key):
+            return value
+        if hasattr(self, "_image_handle") and self.image_handle is not None:
+            value = self.image_handle.adapter.sync(value)
+        return value
+
     def __getitem__(self, key):
         value = super(BaseImage, self).__getitem__(key)
-        if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"):
+        if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key):
             value = dill.loads(zlib.decompress(value))
+
+        value = self._sync_to_handle(key, value)
         return value
 
     def __setitem__(self, key, value):
-        if hasattr(self, "in_memory_compression") and self.in_memory_compression and key.startswith("img"):
+        value = self._sync_to_handle(key, value)
+        if hasattr(self, "in_memory_compression") and self.in_memory_compression and self.__class__.is_img_data(key):
             value = zlib.compress(dill.dumps(value), level=5)
-
         return super(BaseImage, self).__setitem__(key, value)
 
     # setbounding box start coordinate and size
@@ -152,7 +166,7 @@ def validate_slide_size(size: str, assertion: bool = False):
         # for now just cast it to str
         return size
 
-    def getImgThumb(self, size: str):
+    def getImgThumb(self, size: str) -> Optional[TYPE_ARRAY]:
         # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't
         # really handle it that way, and trace of previous coding also suggest that there actually lack a params
         # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here.
diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py
index 4d4dbcd..fa90ff7 100644
--- a/histoqc/BasicModule.py
+++ b/histoqc/BasicModule.py
@@ -1,6 +1,7 @@
 import logging
 import os
 from histoqc.BaseImage import printMaskHelper
+from histoqc.array_adapter import ArrayAdapter, ArrayDevice
 from skimage.morphology import remove_small_objects, binary_opening, disk
 from skimage import io
 from skimage.util import img_as_ubyte
@@ -27,13 +28,23 @@ def finalComputations(s: BaseImage, params):
 
 def finalProcessingSpur(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tfinalProcessingSpur")
+
+    adapter = s.image_handle.adapter
     disk_radius = int(params.get("disk_radius", "25"))
-    selem = disk(disk_radius)
-    mask = s["img_mask_use"]
-    mask_opened = binary_opening(mask, selem)
-    mask_spur = ~mask_opened & mask
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", img_as_ubyte(mask_spur))
+    # selem = adapter(disk)(disk_radius)
+    mask = s["img_mask_use"]
+    mask_opened = adapter(binary_opening)(mask, footprint=disk(disk_radius))
+    # todo: it is safe to directly compare
+    # todo: ~mask_opened & mask directly as the device of both are synchronized by adapter.
+    # todo: but this assumes that an adapter is used in the module so
+    # todo: for now unless we implement an array proxy the best practice is to use explicit and_ method
+    # todo: to avoid mistakes
+    mask_spur = adapter.and_(~mask_opened, mask)
+    fname = os.path.join(s["outdir"], f"{s['filename']}_spur.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask_spur))
+    # io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", ArrayAdapter.move_to_device(mask_spur_ubyte,
+    #                                                                                           ArrayDevice.CPU))
 
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = mask_opened
@@ -52,13 +63,16 @@ def finalProcessingSpur(s: BaseImage, params):
 
 def finalProcessingArea(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tfinalProcessingArea")
+
+    adapter = s.image_handle.adapter
     area_thresh = int(params.get("area_threshold", "1000"))
     mask = s["img_mask_use"]
 
-    mask_opened = remove_small_objects(mask, min_size=area_thresh)
-    mask_removed_area = ~mask_opened & mask
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area))
+    mask_opened = adapter(remove_small_objects)(mask, min_size=area_thresh)
+    mask_removed_area = adapter.and_(~mask_opened, mask)
+    fname = os.path.join(s["outdir"], f"{s['filename']}_areathresh.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask_removed_area))
+    # io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area))
 
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = mask_opened > 0
diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py
index 9900780..7ea4993 100644
--- a/histoqc/BlurDetectionModule.py
+++ b/histoqc/BlurDetectionModule.py
@@ -7,6 +7,7 @@
 from skimage.util import img_as_ubyte
 from skimage.color import rgb2gray
 import numpy as np
+from histoqc.array_adapter import ArrayAdapter, FUNC_MAP, ArrayDevice
 
 # Analysis of focus measure operators for shape-from-focus
 # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012
@@ -15,35 +16,41 @@
 
 def identifyBlurryRegions(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tidentifyBlurryRegions")
-
+    adapter = s.image_handle.adapter
     blur_radius = int(params.get("blur_radius", 7))
     blur_threshold = float(params.get("blur_threshold", .1))
-
     img = s.getImgThumb(params.get("image_work_size", "2.5x"))
-    img = rgb2gray(img)
-    img_laplace = np.abs(skimage.filters.laplace(img))
-    mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold
+    img = adapter(rgb2gray)(img)
+    # use the __abs__ interface
 
+    img_laplace = abs(adapter(skimage.filters.laplace)(img))
+    mask = adapter(skimage.filters.gaussian)(img_laplace, sigma=blur_radius) <= blur_threshold
     # for some reason resize takes a grayscale and produces a 3chan
-    mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, 1]
+    # Note: the reason you obtain a 3chan is that you specified a 3chan output shape
+    mask_resized_shape = s.getImgThumb(s["image_work_size"]).shape[:2]
+    mask = adapter(skimage.transform.resize)(mask, output_shape=mask_resized_shape, order=0)
 
-    mask = s["img_mask_use"] & (mask > 0)
+    mask = adapter.and_(s["img_mask_use"], mask > 0)
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_blurry.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask))
     s["img_mask_blurry"] = (mask * 255) > 0
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_blurry"]
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_blurry"])
+
+    labeled_mask = adapter(morphology.label)(mask)
+    rps = adapter(measure.regionprops)(labeled_mask)
 
-    rps = measure.regionprops(morphology.label(mask))
     if rps:
-        areas = np.asarray([rp.area for rp in rps])
+        # scalar stats --> CPU is sufficient.
+        # use float to cast cp.array(scalar) to python's float
+        areas = np.asarray([float(rp.area) for rp in rps])
         nobj = len(rps)
         area_max = areas.max()
         area_mean = areas.mean()
     else:
         nobj = area_max = area_mean = 0
-
     s.addToPrintList("blurry_removed_num_regions", str(nobj))
     s.addToPrintList("blurry_removed_mean_area", str(area_mean))
     s.addToPrintList("blurry_removed_max_area", str(area_max))
diff --git a/histoqc/BrightContrastModule.py b/histoqc/BrightContrastModule.py
index df98aec..a6e1ebe 100644
--- a/histoqc/BrightContrastModule.py
+++ b/histoqc/BrightContrastModule.py
@@ -4,9 +4,45 @@
 from skimage.color import convert_colorspace, rgb2gray
 from distutils.util import strtobool
 from histoqc.BaseImage import BaseImage
+from histoqc.array_adapter.typing import TYPE_ARRAY
+
+_EPS = np.finfo(np.float32).eps
+
+
+def _rms(img: TYPE_ARRAY) -> float:
+    """Root Mean Square contrast for non-empty masked images
+    Args:
+        img: Input image or a vector yielded from masked image. Not Tissue-less
+    Returns:
+
+    """
+    assert img.size > 0
+    err = (img - img.mean()) ** 2
+    return float(np.sqrt(err.sum() / img.size))
+
+
+def _michelson(img: TYPE_ARRAY) -> float:
+    """Helper function for non-empty masked input
+    Args:
+        img:
+    Returns:
+
+    """
+    assert img.size > 0
+    max_img = img.max()
+    min_img = img.min()
+    # add eps to avoid nan when max and min are both 0.
+    denominator = max_img + min_img
+    denominator = denominator if denominator != 0 else denominator + _EPS
+    return float((max_img - min_img) / denominator)
+
+
+def _tenengrad_from_sobel2(sobel_img2: TYPE_ARRAY):
+    return np.sqrt(sobel_img2.sum()) / sobel_img2.size
 
 
 def getBrightnessGray(s: BaseImage, params):
+    adapter = s.image_handle.adapter
     prefix = params.get("prefix", None)
     prefix = prefix+"_" if prefix else ""
     logging.info(f"{s['filename']} - \tgetContrast:{prefix}")
@@ -17,11 +53,11 @@ def getBrightnessGray(s: BaseImage, params):
 
     img = s.getImgThumb(s["image_work_size"])
 
-    img_g = rgb2gray(img)
+    img_g = adapter(rgb2gray)(img)
     if limit_to_mask:
 
         mask = s[mask_name] if not invert else ~s[mask_name]
-
+        mask = adapter.sync(mask)
         img_g = img_g[mask]
         if img_g.size == 0:
             img_g = np.array(-100)
@@ -33,6 +69,7 @@ def getBrightnessGray(s: BaseImage, params):
 
 
 def getBrightnessByChannelinColorSpace(s: BaseImage, params):
+    adapter = s.image_handle.adapter
     prefix = params.get("prefix", None)
     prefix = prefix + "_" if prefix else ""
 
@@ -48,7 +85,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params):
 
     suffix = ""
     if to_color_space != "RGB":
-        img = convert_colorspace(img, "RGB", to_color_space)
+        img = adapter(convert_colorspace)(img, fromspace="RGB", tospace=to_color_space)
         suffix = "_" + to_color_space
 
     for chan in range(0, 3):
@@ -56,6 +93,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params):
         if limit_to_mask:
 
             mask = s[mask_name] if not invert else ~s[mask_name]
+            mask = adapter.sync(mask)
             vals = vals[mask]
 
             if vals.size == 0:
@@ -68,6 +106,7 @@ def getBrightnessByChannelinColorSpace(s: BaseImage, params):
 
 
 def getContrast(s: BaseImage, params):
+    adapter = s.image_handle.adapter
     prefix = params.get("prefix", None)
     prefix = prefix + "_" if prefix else ""
 
@@ -78,15 +117,16 @@ def getContrast(s: BaseImage, params):
     invert = strtobool(params.get("invert", "False"))
 
     img = s.getImgThumb(s["image_work_size"])
-    img = rgb2gray(img)
+    img = adapter(rgb2gray)(img)
     # noinspection PyTypeChecker
-    sobel_img = sobel(img) ** 2
+    sobel_img2 = adapter(sobel)(img) ** 2
 
+    mask = None
     if limit_to_mask:
 
         mask = s[mask_name] if not invert else ~s[mask_name]
-
-        sobel_img = sobel_img[mask]
+        img, sobel_img2, mask = adapter.device_sync_all(img, sobel_img2, mask)
+        sobel_img2 = sobel_img2[mask]
         img = img[s["img_mask_use"]]
 
     if img.size == 0:  # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to
@@ -104,17 +144,18 @@ def getContrast(s: BaseImage, params):
         return
 
     # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask
-    tenen_grad_contrast = np.sqrt(np.sum(sobel_img)) / img.size
+
+    # np.sqrt(sobel_img2.sum()) / sobel_img2.size
+    # np.sqrt(np.sum(sobel_img)) / img.size
+    tenen_grad_contrast = _tenengrad_from_sobel2(sobel_img2=sobel_img2)
+
     s.addToPrintList(f"{prefix}tenen_grad_contrast", str(tenen_grad_contrast))
 
     # Michelson contrast
-    max_img = img.max()
-    min_img = img.min()
-    contrast = (max_img - min_img) / (max_img + min_img)
+    contrast = _michelson(img)
     s.addToPrintList(f"{prefix}michelson_contrast", str(contrast))
 
     # RMS contrast
-    rms_contrast = np.sqrt(pow(img - img.mean(), 2).sum() / img.size)
+    rms_contrast = _rms(img)
     s.addToPrintList(f"{prefix}rms_contrast", str(rms_contrast))
-
     return
diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py
index e5d2249..fc2bf75 100644
--- a/histoqc/BubbleRegionByRegion.py
+++ b/histoqc/BubbleRegionByRegion.py
@@ -18,15 +18,16 @@
 
 # WARNING: Not as robust as other modules
 def roiWise(s: BaseImage, params):
+    adapter = s.image_handle.adapter
     name = params.get("name", "classTask")
     print("\tpixelWise:\t", name, end="")
 
-    level = int(params.get("level", 1))
     win_size = int(params.get("win_size", 2048))  # the size of the ROI which will be iteratively considered
 
     osh = s.image_handle
 
     dim_base = osh.level_dimensions[0]
+    level = min(int(params.get("level", 1)), len(osh.level_dimensions) - 1)
     dims = osh.level_dimensions[level]
 
     ratio_x = dim_base[0] / dims[0]  # figure out the difference between desi
@@ -43,63 +44,63 @@ def roiWise(s: BaseImage, params):
         row_piece = []
         print('.', end='', flush=True)
         for y in range(0, dim_base[1], round(win_size * ratio_y)):
-            region = osh.read_region((x, y), 1, (win_size, win_size))
-            region = np.array(region)
-            region = region[:, :, 0:3]  # remove alpha channel
-            g = rgb2gray(region)
+            # todo: confirm -- the original level is hardcoded to be 1, shouldn't it be the level variable?
+
+            region = osh.region_backend((x, y), level, (win_size, win_size))
+            region = osh.backend_to_array(region)[..., :3]
+            g = adapter(rgb2gray)(region)
             # todo -- forward compatibility. Later version of frangi alters the signatures
-            feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges)
+            sigmas = frangi_scale_range + (frangi_scale_step,)
+            feat = adapter(frangi)(g, sigmas=sigmas, beta=frangi_beta1,
+                                   gamma=frangi_beta2, black_ridges=frangi_black_ridges)
+
             feat = feat / 8.875854409275627e-08
-            region_mask = np.bitwise_and(g < .3, feat > 5)
-            region_mask = remove_small_objects(region_mask, min_size=100)
-            # region_std = region.std(axis=2)
-            # region_gray = rgb2gray(region)
-            # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255)
-            # region_mask = scipy.ndimage.morphology.binary_dilation(region_mask, iterations=1)
-            # region_mask = resize(region_mask , (region_mask.shape[0] / 2, region_mask.shape[1] / 2))
+            region_mask = adapter.and_(g < .3, feat > 5)
+            region_mask = adapter(remove_small_objects)(region_mask, min_size=100)
             row_piece.append(region_mask)
+        # sanity check: force to synchronize the device
+        row_piece = adapter.device_sync_all(*row_piece)
         row_piece = np.concatenate(row_piece, axis=0)
-        mask.append(row_piece)
 
+        mask.append(row_piece)
+    mask = adapter.device_sync_all(*mask)
     mask = np.concatenate(mask, axis=1)
 
     if params.get("area_threshold", "") != "":
         # forward compatibility
         # inplace=True is equivalent to out=mask. Therefore, it is removed in future version
-        mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), out=mask)
+        mask = adapter(remove_small_objects)(mask, min_size=int(params.get("area_threshold", "")), out=mask)
 
     s.addToPrintList(name, str(mask.mean()))
 
-    # TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask
-    # s.addToPrintList(name,
-    #                  printMaskHelper(params.get("mask_statistics", s["mask_statistics"]),
-    #                                  prev_mask, s["img_mask_use"]))
-
-    # .astype(np.uint8) * 255)
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask))
-
+    fname = os.path.join(s["outdir"], f"{s['filename']}_BubbleBounds.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask))
     return
 
 
 def detectSmoothness(s: BaseImage, params):
+
+    adapter = s.image_handle.adapter
     logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness")
     thresh = float(params.get("threshold", ".01"))
     kernel_size = int(params.get("kernel_size", "10"))
     min_object_size = int(params.get("min_object_size", "100"))
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
+    img = adapter(color.rgb2gray)(img)
     avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2)
 
-    imf = convolve2d(img, avg, mode="same")
+    imf = adapter(convolve2d)(img, in2=avg, mode="same")
+
     mask_flat = abs(imf - img) < thresh
 
-    mask_flat = remove_small_objects(mask_flat, min_size=min_object_size)
-    mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size)
+    mask_flat = adapter(remove_small_objects)(mask_flat, min_size=min_object_size)
+    mask_flat = ~adapter(remove_small_objects)(~mask_flat, min_size=min_object_size)
 
     prev_mask = s["img_mask_use"]
     s["img_mask_flat"] = mask_flat
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_flat.png")
+    flat_out = adapter.and_(mask_flat, prev_mask)
+    adapter.imsave(fname, adapter(img_as_ubyte)(flat_out))
 
     s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"]
 
@@ -112,6 +113,5 @@ def detectSmoothness(s: BaseImage, params):
                         f"remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains "
                              f"detectable! Downstream modules likely to be incorrect/fail")
-
     return
 
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index fdbe039..b2cbb4a 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -2,7 +2,7 @@
 import os
 import re
 import sys
-
+from histoqc.array_adapter import ArrayAdapter, ArrayDevice
 from ast import literal_eval as make_tuple
 
 from distutils.util import strtobool
@@ -10,7 +10,7 @@
 from histoqc.BaseImage import printMaskHelper, BaseImage
 from skimage import io
 from skimage.util import img_as_ubyte, img_as_bool
-from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace
+from skimage.filters import gabor, frangi, gaussian, median, laplace
 from skimage.color import rgb2gray
 from skimage.morphology import remove_small_objects, disk, dilation
 from skimage.feature import local_binary_pattern
@@ -30,29 +30,35 @@ def pixelWise(s: BaseImage, params):
     thresh = float(params.get("threshold", .5))
 
     fname = params.get("tsv_file", "")
+
     if fname == "":
         logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name)
         sys.exit(1)
 
     model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1)
 
-    img = s.getImgThumb(s["image_work_size"])
+    # todo no formal support for GNB now
+    # todo  Possible solution: sklearn with array-api-compat and implement a wrapper into the ArrayAdaptor
+    # todo Also: need to rework the GaussianNB.fit interface into a wrapper.
+    device = s.image_handle.device
+    adapter = ArrayAdapter.build(input_device=device, output_device=device)
+    img = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU)
 
     gnb = GaussianNB()
     gnb.fit(model_vals[:, 1:], model_vals[:, 0])
-    cal = gnb.predict_proba(img.reshape(-1, 3))
+    cal = adapter(gnb.predict_proba)(img.reshape(-1, 3))
 
     cal = cal.reshape(img.shape[0], img.shape[1], 2)
     mask = cal[:, :, 1] > thresh
 
-    mask = s["img_mask_use"] & (mask > 0)
+    mask = adapter.and_(s["img_mask_use"], mask > 0)
 
     s.addToPrintList(name, str(mask.mean()))
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png")
+    adapter.imsave(fname, img_as_ubyte(mask))
     s["img_mask_" + name] = (mask * 255) > 0
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name]
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name])
 
     s.addToPrintList(name,
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -69,63 +75,79 @@ def pixelWise(s: BaseImage, params):
 # extract_patches_2d(image, patch_size, max_patches=None, random_state=None
 
 def compute_rgb(img, params):
-    return img
+    adapter = params["adapter"]
+    return adapter.sync(img)
 
 
 def compute_laplace(img, params):
     laplace_ksize = int(params.get("laplace_ksize", 3))
-    return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None]
+    adapter = params["adapter"]
+    img_gray = adapter(rgb2gray)(img)
+    #     return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None]
+    return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None]
+
 
 
 def compute_lbp(img, params):
     lbp_radius = float(params.get("lbp_radius", 3))
     lbp_points = int(params.get("lbp_points", 24))  # example sets radius * 8
     lbp_method = params.get("lbp_method", "default")
+    # todo: currently no LBP implemented
+    adapter: ArrayAdapter = params['adapter']
+    img_gray = adapter(rgb2gray)(img)
+    # return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None]
+    return adapter(local_binary_pattern)(img_gray, P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None]
 
-    return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None]
 
 
 def compute_gaussian(img, params):
+    adapter = params["adapter"]
     gaussian_sigma = int(params.get("gaussian_sigma", 1))
-    gaussian_multichan = strtobool(params.get("gaussian_multichan", False))
-
+    gaussian_multichan = strtobool(params.get("gaussian_multichan", "False"))
     # todo: forward compatibility
     # todo: after 0.19 default multichannel behavior is fixed and explicitly setting channel_axis is preferred.
     # todo: multichannel is also deprecated in later versions
     if gaussian_multichan:
-        return gaussian(img, sigma=gaussian_sigma, channel_axis=-1)
+        return adapter(gaussian)(img, sigma=gaussian_sigma, channel_axis=-1)
     else:
-        return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None]
+        img_gray = adapter(rgb2gray)(img)
+        return adapter(gaussian)(img_gray, sigma=gaussian_sigma)[:, :, None]
 
 
 def compute_median(img, params):
     median_disk_size = int(params.get("median_disk_size", 3))
     # starting from 0.19, selem is deprecated and footprint is preferred.
-    return median(rgb2gray(img), footprint=disk(median_disk_size))[:, :, None]
+    adapter: ArrayAdapter = params['adapter']
+    imgg = adapter(rgb2gray)(img)
+    footprint = adapter(disk)(median_disk_size)
+    return adapter(median)(imgg, footprint=footprint)[:, :, None]
 
 
 def compute_gabor(img, params):
-    if not params["shared_dict"].get("gabor_kernels", False):
-        gabor_theta = int(params.get("gabor_theta", 4))
-        gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)"))
-        gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)"))
-
-        kernels = []
-        for theta in range(gabor_theta):
-            theta = theta / 4. * np.pi
-            for sigma in gabor_sigma:
-                for frequency in gabor_frequency:
-                    kernel = np.real(gabor_kernel(frequency, theta=theta,
-                                                  sigma_x=sigma, sigma_y=sigma))
-                    kernels.append(kernel)
-        params["shared_dict"]["gabor_kernels"] = kernels
-
-    kernels = params["shared_dict"]["gabor_kernels"]
-    imgg = rgb2gray(img)
-    feats = np.zeros((imgg.shape[0], imgg.shape[1], len(kernels)), dtype=np.double)
-    for k, kernel in enumerate(kernels):
-        filtered = ndi.convolve(imgg, kernel, mode='wrap')
-        feats[:, :, k] = filtered
+    adapter = params["adapter"]
+    # if not params["shared_dict"].get("gabor_kernels", False):
+    # todo: the benefit of caching the gabor_kernel is marginal as the computational head to obtain the kernel itself
+    # todo: is neglectable
+    gabor_theta = int(params.get("gabor_theta", 4))
+    gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)"))
+    gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)"))
+
+    # kernels = []
+    fts = []
+    for theta in range(gabor_theta):
+        theta = theta / 4. * np.pi
+        for sigma in gabor_sigma:
+            for frequency in gabor_frequency:
+
+                fts.append((frequency, theta, sigma))
+
+    imgg = adapter(rgb2gray)(img)
+    feats = np.zeros((imgg.shape[0], imgg.shape[1], len(fts)), dtype=np.double)
+    feats = adapter.sync(feats)
+
+    for idx, (freq, tht, sig) in enumerate(fts):
+        filtered, _ = adapter(gabor)(imgg, theta=tht, sigma_x=sig, sigma_y=sig, frequency=freq, mode='wrap')
+        feats[:, :, idx] = filtered
     return feats
 
 
@@ -136,22 +158,31 @@ def compute_frangi(img, params):
     frangi_beta2 = float(params.get("frangi_beta2", 15))
     frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True"))
     sigmas = frangi_scale_range + (frangi_scale_step,)
-    feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2, black_ridges=frangi_black_ridges)
+
+    adapter: ArrayAdapter = params["adapter"]
+    img_gray = adapter(rgb2gray)(img)
+    feat = adapter(frangi)(img_gray, sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2,
+                           black_ridges=frangi_black_ridges)
+    # feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2,
+    #               black_ridges=frangi_black_ridges)
     return feat[:, :, None]  # add singleton dimension
 
 
 def compute_features(img, params):
     features = params.get("features", "")
-
+    adapter = params["adapter"]
     feats = []
     for feature in features.splitlines():
         func = getattr(sys.modules[__name__], f"compute_{feature}")
         feats.append(func(img, params))
-
+    feats = adapter.device_sync_all(*feats)
+    # cupy can be implicitly concatenated using np's API
     return np.concatenate(feats, axis=2)
 
 
 def byExampleWithFeatures(s: BaseImage, params):
+    device = s.image_handle.device
+    adapter = ArrayAdapter.build(input_device=device, output_device=device)
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}")
 
@@ -167,13 +198,16 @@ def byExampleWithFeatures(s: BaseImage, params):
         logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!")
         sys.exit(1)
 
+    adapter = s.image_handle.adapter
+    params['adapter'] = adapter
     with params["lock"]:  # this lock is shared across all threads such that only one thread needs to train the model
         # then it is shared with all other modules
         if not params["shared_dict"].get("model_" + name, False):
+
             logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}")
 
             model_vals = []
-            model_labels = np.empty([0, 1])
+            model_labels = adapter.sync(np.empty([0, 1]))
 
             for ex in params["examples"].splitlines():
                 ex = re.split(r'(?<!\W[A-Za-z]):(?!\\)', ex)  # workaround for windows: don't split on i.e. C:\
@@ -183,6 +217,7 @@ def byExampleWithFeatures(s: BaseImage, params):
 
                 # read mask as grayscale images
                 mask = io.imread(ex[1], as_gray=True)
+                mask = adapter.sync(mask)
                 # convert grayscale images into binary images if images are not binary format 
                 if mask.dtype.kind != 'b':
                     # warning log
@@ -190,8 +225,8 @@ def byExampleWithFeatures(s: BaseImage, params):
                     logging.warning(s['filename'] + ' - ' + msg)
                     s["warnings"].append(msg)
                     # convert to binary
-                    mask = img_as_bool(mask)
-                
+                    mask = adapter(img_as_bool)(mask)
+
                 mask = mask.reshape(-1, 1)
 
                 if nsamples_per_example != -1:  # sub sampling required
@@ -202,37 +237,47 @@ def byExampleWithFeatures(s: BaseImage, params):
                     mask = mask[idxkeep]
 
                 model_vals.append(eximg)
+                # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray
+                # but we explicitly sync the device of mask and model_labels anyway
                 model_labels = np.vstack((model_labels, mask))
 
             # do stuff here with model_vals
             model_vals = np.vstack(model_vals)
             clf = RandomForestClassifier(n_jobs=-1)
-            clf.fit(model_vals, model_labels.ravel())
+            # adapter(clf.fit)(model_vals, model_labels.ravel())
+            adapter(clf.fit)(model_vals, y=model_labels.ravel())
             params["shared_dict"]["model_" + name] = clf
             logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done")
 
     clf = params["shared_dict"]["model_" + name]
     img = s.getImgThumb(s["image_work_size"])
     feats = compute_features(img, params)
-    cal = clf.predict_proba(feats.reshape(-1, feats.shape[2]))
+    cal = adapter(clf.predict_proba)(feats.reshape(-1, feats.shape[2]))
     cal = cal.reshape(img.shape[0], img.shape[1], 2)
 
     mask = cal[:, :, 1] > thresh
     area_thresh = int(params.get("area_threshold", "5"))
     if area_thresh > 0:
         # inplace=True is redundant and deprecated.
-        mask = remove_small_objects(mask, min_size=area_thresh, out=mask)
+        mask = adapter(remove_small_objects)(mask, min_size=area_thresh, out=mask)
 
     dilate_kernel_size = int(params.get("dilate_kernel_size", "0"))
     if dilate_kernel_size > 0:
-        mask = dilation(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size)))
+        mask = adapter(dilation)(mask, footprint=np.ones((dilate_kernel_size, dilate_kernel_size)))
 
     mask = s["img_mask_use"] & (mask > 0)
+    # mask_ubyte = adapter.move_to_device(adapter(img_as_ubyte)(mask), device=ArrayDevice.CPU)
+    # io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", mask_ubyte)
+
+    fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask))
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask))
     s["img_mask_" + name] = (mask * 255) > 0
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name]
+
+    # todo: now the BaseImage will explicitly set/get img_* keys (except bbox) to the corresponding img handle device
+    # todo: however I think it could be better to also explicitly let the adapter to handle all binary operations.
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], ~s["img_mask_" + name])
 
     s.addToPrintList(name,
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -245,5 +290,4 @@ def byExampleWithFeatures(s: BaseImage, params):
 
     s["img_mask_force"].append("img_mask_" + name)
     s["completed"].append(f"byExampleWithFeatures:{name}")
-
     return
diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py
index bff440b..f3b4613 100644
--- a/histoqc/DeconvolutionModule.py
+++ b/histoqc/DeconvolutionModule.py
@@ -12,6 +12,7 @@
 
 def separateStains(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tseparateStains")
+    adapter = s.image_handle.adapter
     stain = params.get("stain", "")
     use_mask = strtobool(params.get("use_mask", "True"))
 
@@ -25,13 +26,15 @@ def separateStains(s: BaseImage, params):
         logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains")
         sys.exit(1)
 
-    mask = s["img_mask_use"]
+    adapter.sync(stain_matrix)
+    mask = adapter.sync(s["img_mask_use"])
 
     if use_mask and len(mask.nonzero()[0]) == 0:  # -- lets just error check at the top if mask is empty and abort early
         for c in range(3):
             s.addToPrintList(f"deconv_c{c}_std", str(-100))
             s.addToPrintList(f"deconv_c{c}_mean", str(-100))
-            io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape)))
+            fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png")
+            adapter.imsave(fname, img_as_ubyte(np.zeros(mask.shape)))
 
         logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue "
                         f"remains detectable! Saving Black images")
@@ -41,11 +44,10 @@ def separateStains(s: BaseImage, params):
         return
 
     img = s.getImgThumb(s["image_work_size"])
-    dimg = separate_stains(img, stain_matrix)
+    dimg = adapter(separate_stains)(img, conv_matrix=stain_matrix)
 
     for c in range(0, 3):
         dc = dimg[:, :, c]
-
         clip_max_val = np.quantile(dc.flatten(), .99)
         dc = np.clip(dc, a_min=0, a_max=clip_max_val)
 
@@ -65,6 +67,6 @@ def separateStains(s: BaseImage, params):
             s.addToPrintList(f"deconv_c{c}_std", str(dc.std()))
 
         dc = (dc - dc_min) / float(dc_max - dc_min) * mask
-        io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc))
-
+        fname = os.path.join(s["outdir"], f"{s['filename']}_deconv_c{c}.png")
+        adapter.imsave(fname, adapter(img_as_ubyte)(dc))
     return
diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py
index a57122f..901beeb 100644
--- a/histoqc/HistogramModule.py
+++ b/histoqc/HistogramModule.py
@@ -5,18 +5,27 @@
 import matplotlib.pyplot as plt
 from distutils.util import strtobool
 from histoqc.BaseImage import BaseImage
+from typing import Union
+from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.array_adapter.typing import TYPE_ARRAY
+# todo: beware that because there is no lock, it is likely that each worker will compute the template of their own.
 # this holds a local copy of the histograms of the template images so that they need only be computed once
 global_holder = {}
 
 
 def getHistogram(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetHistogram")
+    adapter = s.image_handle.adapter
     limit_to_mask = strtobool(params.get("limit_to_mask", True))
     bins = int(params.get("bins", 20))
 
     img = s.getImgThumb(s["image_work_size"])
+    tissue_mask = s["img_mask_use"]
+    # matplotlib --> pointless to use GPU here even if a corresponding API exists
+    img = adapter.move_to_device(img, ArrayDevice.CPU)
+    tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU)
     if limit_to_mask:
-        img = img[s["img_mask_use"]]
+        img = img[tissue_mask]
     else:
         img = img.reshape(-1, 3)
 
@@ -27,32 +36,38 @@ def getHistogram(s: BaseImage, params):
     ax.set_title('Color Distribution for ' + s["filename"])
     ax.set_xlabel('Pixel Val')
     ax.set_ylabel('Density')
-    plt.savefig(s["outdir"] + os.sep + s["filename"] + "_hist.png")
+    fname = os.path.join(s["outdir"], f"{s['filename']}_hist.png")
+    plt.savefig(fname)
     plt.close()
     return
 
 
-def computeHistogram(img, bins, mask=-1):
+def computeHistogram(img: TYPE_ARRAY, bins: int,
+                     adapter: ArrayAdapter, mask: Union[TYPE_ARRAY, int] = -1) -> TYPE_ARRAY:
     result = np.zeros(shape=(bins, 3))
+    img, mask = adapter.device_sync_all(img, mask)
+    result = adapter.sync(result)
     for chan in range(0, 3):
         vals = img[:, :, chan].flatten()
-        if isinstance(mask, np.ndarray):
+        if ArrayAdapter.is_array(mask):
+
             vals = vals[mask.flatten()]
 
         result[:, chan] = np.histogram(vals, bins=bins, density=True, range=(0, 255))[0]
-
     return result
 
 
 def compareToTemplates(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tcompareToTemplates")
+    adapter = s.image_handle.adapter
     bins = int(params.get("bins", 20))
     limit_to_mask = strtobool(params.get("limit_to_mask", True))
     # if the histograms haven't already been computed, compute and store them now
     if not global_holder.get("templates", False):
         templates = {}
         for template in params["templates"].splitlines():
-            templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins)
+            templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template),
+                                                                                          bins, adapter)
             # compute each of their histograms
         global_holder["templates"] = templates
 
@@ -70,11 +85,12 @@ def compareToTemplates(s: BaseImage, params):
             imghst = np.zeros((bins, 3))
 
         else:
-            imghst = computeHistogram(img, bins, mask)
+            imghst = computeHistogram(img, bins, adapter, mask)
     else:
-        imghst = computeHistogram(img, bins)
+        imghst = computeHistogram(img, bins, adapter)
 
     for template in global_holder["templates"]:
-        val = np.sum(pow(abs(global_holder["templates"][template] - imghst), 2))
+        hist_diff = adapter.sub(global_holder["templates"][template], imghst)
+        val = (abs(hist_diff) ** 2).sum()
         s.addToPrintList(template + "_MSE_hist", str(val))
     return
diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py
index cf3290a..4fe8a8d 100644
--- a/histoqc/LightDarkModule.py
+++ b/histoqc/LightDarkModule.py
@@ -3,6 +3,7 @@
 import numpy as np
 from histoqc.BaseImage import printMaskHelper, BaseImage
 from skimage import io, color
+from histoqc.array_adapter import ArrayAdapter, ArrayDevice
 from skimage.util import img_as_ubyte
 from distutils.util import strtobool
 from skimage.filters import threshold_otsu, rank
@@ -12,27 +13,28 @@
 
 
 def getIntensityThresholdOtsu(s: BaseImage, params):
+
     logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu")
+    adapter = s.image_handle.adapter
     name = params.get("name", "classTask")    
     local = strtobool(params.get("local", "False"))
     radius = int(params.get("radius", 15))
-    selem = disk(radius)
 
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
+    img = adapter(color.rgb2gray)(img)
 
     if local:
-        thresh = rank.otsu(img, selem)
+        thresh = adapter(rank.otsu)(img, footprint=disk(radius))
     else:
-        thresh = threshold_otsu(img)
+        thresh = adapter(threshold_otsu)(img)
 
     region_below_thresh = img < thresh
 
     s["img_mask_" + name] = region_below_thresh > 0
     if strtobool(params.get("invert", "False")):
         s["img_mask_" + name] = ~s["img_mask_" + name]
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(s["img_mask_" + name]))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_" + name]))
 
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name]
@@ -50,6 +52,8 @@ def getIntensityThresholdOtsu(s: BaseImage, params):
 
 
 def getIntensityThresholdPercent(s: BaseImage, params):
+
+    adapter = s.image_handle.adapter
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}")
     lower_thresh = float(params.get("lower_threshold", "-inf"))
@@ -66,9 +70,8 @@ def getIntensityThresholdPercent(s: BaseImage, params):
 
     map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std)
 
-    img = color.rgb2gray(img)
+    img = adapter(color.rgb2gray)(img)
     region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh)
-
     region_between_interval = np.bitwise_and(region_between_interval, map_std)
 
     s["img_mask_" + name] = region_between_interval > 0
@@ -77,10 +80,11 @@ def getIntensityThresholdPercent(s: BaseImage, params):
         s["img_mask_" + name] = ~s["img_mask_" + name]
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name]
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png",
-              img_as_ubyte(prev_mask & ~s["img_mask_" + name]))
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_" + name])
+    fname = os.path.join(s["outdir"], f"{s['filename']}_{name}.png")
+    mask_out = adapter.and_(prev_mask, ~s["img_mask_" + name])
+    mask_out = adapter(img_as_ubyte)(mask_out)
+    adapter.imsave(fname, mask_out)
 
     s.addToPrintList(name,
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -95,6 +99,7 @@ def getIntensityThresholdPercent(s: BaseImage, params):
 
 
 def removeBrightestPixels(s: BaseImage, params):
+    adapter = s.image_handle.adapter
     logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels")
 
     # lower_thresh = float(params.get("lower_threshold", -float("inf")))
@@ -104,9 +109,10 @@ def removeBrightestPixels(s: BaseImage, params):
     # upper_var = float(params.get("upper_variance", float("inf")))
 
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
+    img = adapter(color.rgb2gray)(img)
 
-    kmeans = KMeans(n_clusters=3,  n_init=1).fit(img.reshape([-1, 1]))
+    kmc = KMeans(n_clusters=3,  n_init=1)
+    kmeans = adapter(kmc.fit)(img.reshape([-1, 1]))
     # noinspection PyUnresolvedReferences
     brightest_cluster = np.argmax(kmeans.cluster_centers_)
     # noinspection PyUnresolvedReferences
@@ -118,9 +124,11 @@ def removeBrightestPixels(s: BaseImage, params):
         s["img_mask_bright"] = ~s["img_mask_bright"]
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"]
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"]))
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"])
+    fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png.png")
+    bright_out = adapter.and_(prev_mask, ~s["img_mask_bright"])
+    bright_out = adapter(img_as_ubyte)(bright_out)
+    adapter.imsave(fname, bright_out)
 
     s.addToPrintList("brightestPixels",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -136,24 +144,26 @@ def removeBrightestPixels(s: BaseImage, params):
 
 def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering")
+    adapter = s.image_handle.adapter
     disk_size = int(params.get("disk_size", 10000))
     threshold = int(params.get("upper_threshold", 200))
 
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
-    img = (img * 255).astype(np.uint8)
-    selem = disk(disk_size)
+    img = adapter(color.rgb2gray)(img)
+    img = adapter(img_as_ubyte)(img)
+    # note - for uint type, CPU's rank.minimum is >>> faster than GPU's erosion
+    imgfilt = adapter(rank.minimum)(img, footprint=disk(disk_size))
 
-    imgfilt = rank.minimum(img, selem)
     s["img_mask_bright"] = imgfilt > threshold
 
     if strtobool(params.get("invert", "True")):
         s["img_mask_bright"] = ~s["img_mask_bright"]
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"]
-
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"]))
+    s["img_mask_use"] = adapter.and_(s["img_mask_use"], s["img_mask_bright"])
+    fname = os.path.join(s["outdir"], f"{s['filename']}_bright.png")
+    mask_out = adapter.and_(prev_mask, ~s["img_mask_bright"])
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask_out))
 
     s.addToPrintList("brightestPixels",
                      printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
@@ -169,11 +179,12 @@ def minimumPixelIntensityNeighborhoodFiltering(s: BaseImage, params):
 
 def saveEqualisedImage(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage")
-
+    adapter = s.image_handle.adapter
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
-
-    out = exposure.equalize_hist((img*255).astype(np.uint8))
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", img_as_ubyte(out))
+    img = adapter(color.rgb2gray)(img)
+    img_u8 = adapter(img_as_ubyte)(img)
+    out = adapter(exposure.equalize_hist)(img_u8)
+    out_u8 = adapter.move_to_device(adapter(img_as_ubyte)(out), ArrayDevice.CPU)
+    io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", out_u8)
 
     return
diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py
index 610141c..2e28762 100644
--- a/histoqc/LocalTextureEstimationModule.py
+++ b/histoqc/LocalTextureEstimationModule.py
@@ -3,12 +3,14 @@
 from skimage import color
 from distutils.util import strtobool
 from skimage.feature import graycomatrix, graycoprops
+from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.BaseImage import BaseImage
 
 
-def estimateGreyComatrixFeatures(s, params):
+def estimateGreyComatrixFeatures(s: BaseImage, params):
     prefix = params.get("prefix", None)
     prefix = prefix+"_" if prefix else ""
-
+    adapter = s.image_handle.adapter
     logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}")
     patch_size = int(params.get("patch_size", 32))
     npatches = int(params.get("npatches", 100))
@@ -18,7 +20,7 @@ def estimateGreyComatrixFeatures(s, params):
     mask_name = params.get("mask_name", "img_mask_use")
 
     img = s.getImgThumb(s["image_work_size"])
-    img = color.rgb2gray(img)
+    img = adapter(color.rgb2gray)(img)
 
     mask = s[mask_name] if not invert else ~s[mask_name]
     if len(mask.nonzero()[0]) == 0:  # add warning in case the no tissus detected in mask
@@ -29,20 +31,23 @@ def estimateGreyComatrixFeatures(s, params):
         return
 
     maskidx = mask.nonzero()
-    maskidx = np.asarray(maskidx).transpose()
+    maskidx = ArrayAdapter.new_array(maskidx, array_device=ArrayDevice.CPU).transpose()
     idx = np.random.choice(maskidx.shape[0], npatches)
 
     results = []
-
     for index in idx:
         r, c = maskidx[index, :]
+
         patch = img[r:r + patch_size, c:c + patch_size]
-        glcm = graycomatrix(np.digitize(patch, np.linspace(0, 1, num=nlevels), right=True), distances=[5],
-                            angles=[0], levels=nlevels, symmetric=True, normed=True)
 
-        results.append([graycoprops(glcm, prop=feat) for feat in feats])
+        image = adapter(np.digitize)(patch, bins=np.linspace(0, 1, num=nlevels), right=True)
+        glcm = adapter(graycomatrix)(image, distances=[5],
+                                     angles=[0], levels=nlevels, symmetric=True, normed=True)
+        haralick_feats = [adapter(graycoprops)(glcm, prop=feat) for feat in feats]
+        haralick_feats = adapter.device_sync_all(*haralick_feats)
+        results.append(haralick_feats)
 
-    results = np.asarray(results).squeeze()
+    results = adapter.asarray(results).squeeze()
 
     for vals, feat in zip(results.transpose(), feats):
         s.addToPrintList(f"{prefix}{feat}", str(vals.mean()))
diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py
index d660cfe..4d2c301 100644
--- a/histoqc/MorphologyModule.py
+++ b/histoqc/MorphologyModule.py
@@ -1,28 +1,32 @@
 import logging
 import os
 import numpy as np
-from histoqc.BaseImage import printMaskHelper
+from histoqc.BaseImage import printMaskHelper, BaseImage
+from histoqc.array_adapter import ArrayAdapter
+from histoqc.array_adapter.typing import TYPE_ARRAY
 from skimage import io, morphology, measure
 from skimage.util import img_as_ubyte
 from scipy import ndimage as ndi
 from typing import cast
 
 
-def removeSmallObjects(s, params):
+def removeSmallObjects(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tremoveSmallObjects")
+    adapter = s.image_handle.adapter
     min_size = int(params.get("min_size", 64))
-    img_reduced = morphology.remove_small_objects(s["img_mask_use"], min_size=min_size)
-    img_small = np.invert(img_reduced) & s["img_mask_use"]
+    img_reduced = adapter(morphology.remove_small_objects)(s["img_mask_use"], min_size=min_size)
+    img_small = adapter.and_(~img_reduced, s["img_mask_use"])
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_remove.png", img_as_ubyte(img_small))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_small_remove.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(img_small))
     s["img_mask_small_filled"] = (img_small * 255) > 0
 
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = img_reduced
-
-    rps = measure.regionprops(morphology.label(img_small))
+    label_small = adapter(morphology.label)(img_small)
+    rps = adapter(measure.regionprops)(label_small)
     if rps:
-        areas = np.asarray([rp.area for rp in rps])
+        areas = np.asarray([float(rp.area) for rp in rps])
         nobj = len(rps)
         area_max = areas.max()
         area_mean = areas.mean()
@@ -41,17 +45,17 @@ def removeSmallObjects(s, params):
                         f"remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(f"After MorphologyModule.removeSmallObjects: NO tissue remains "
                              f"detectable! Downstream modules likely to be incorrect/fail")
-
     return
 
 
-def remove_large_objects(img, max_size):
+def remove_large_objects(img: TYPE_ARRAY, max_size: int, adapter: ArrayAdapter):
     # code taken from morphology.remove_small_holes, except switched < with >
-    selem = ndi.generate_binary_structure(img.ndim, 1)
-    ccs = np.zeros_like(img, dtype=np.int32)
-    ndi.label(img, selem, output=ccs)
-    component_sizes: np.ndarray = np.bincount(ccs.ravel())
-    too_big: np.ndarray = cast(np.ndarray, component_sizes > max_size)
+    # selem = adapter(ndi.generate_binary_structure)(img.ndim, connectivity=1)
+    # equivalent to ndi.label in binary case.
+    img = adapter.sync(img)
+    ccs = adapter(measure.label)(img, connectivity=1)
+    component_sizes: TYPE_ARRAY = adapter.sync(np.bincount(ccs.ravel()))
+    too_big: TYPE_ARRAY = component_sizes > max_size
     too_big_mask = too_big[ccs]
     img_out = img.copy()
     img_out[too_big_mask] = 0
@@ -60,34 +64,37 @@ def remove_large_objects(img, max_size):
 
 def removeFatlikeTissue(s, params):
     logging.info(f"{s['filename']} - \tremoveFatlikeTissue")
+    adapter = s.image_handle.adapter
     fat_cell_size = int(params.get("fat_cell_size", 64))
     kernel_size = int(params.get("kernel_size", 3))
     max_keep_size = int(params.get("max_keep_size", 1000))
 
-    img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=fat_cell_size)
-    img_small = img_reduced & np.invert(s["img_mask_use"])
-    img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9)
+    img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=fat_cell_size)
+    img_small = adapter.and_(img_reduced, ~s["img_mask_use"])
+    img_small = ~adapter(morphology.remove_small_holes)(~img_small, area_threshold=9)
+    # binary
+    mask_dilate = adapter(morphology.dilation)(img_small, footprint=np.ones((kernel_size, kernel_size)))
 
-    mask_dilate = morphology.dilation(img_small, footprint=np.ones((kernel_size, kernel_size)))
-    mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size)
+    mask_dilate_removed = remove_large_objects(mask_dilate, max_size=max_keep_size, adapter=adapter)
 
-    mask_fat = mask_dilate & ~mask_dilate_removed
+    mask_fat = adapter.and_(mask_dilate, ~mask_dilate_removed)
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_fatlike.png", img_as_ubyte(mask_fat))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_fatlike.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask_fat))
     s["img_mask_fatlike"] = (mask_fat * 255) > 0
 
     prev_mask = s["img_mask_use"]
-    s["img_mask_use"] = prev_mask & ~mask_fat
+    s["img_mask_use"] = adapter.and_(prev_mask, ~mask_fat)
 
-    rps = measure.regionprops(morphology.label(mask_fat))
+    label_fat = adapter(morphology.label)(mask_fat)
+    rps = adapter(measure.regionprops)(label_fat)
     if rps:
-        areas = np.asarray([rp.area for rp in rps])
+        areas = np.asarray([float(rp.area) for rp in rps])
         nobj = len(rps)
         area_max = areas.max()
         area_mean = areas.mean()
     else:
         nobj = area_max = area_mean = 0
-
     s.addToPrintList("fatlike_tissue_removed_num_regions", str(nobj))
     s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean))
     s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max))
@@ -100,23 +107,26 @@ def removeFatlikeTissue(s, params):
                         f"remains detectable! Downstream modules likely to be incorrect/fail")
         s["warnings"].append(f"After MorphologyModule.removeFatlikeTissue: NO tissue remains "
                              f"detectable! Downstream modules likely to be incorrect/fail")
-
+    return
 
 def fillSmallHoles(s, params):
     logging.info(f"{s['filename']} - \tfillSmallHoles")
+    adapter = s.image_handle.adapter
     min_size = int(params.get("min_size", 64))
-    img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size)
-    img_small = img_reduced & np.invert(s["img_mask_use"])
+    img_reduced = adapter(morphology.remove_small_holes)(s["img_mask_use"], area_threshold=min_size)
+    img_small = adapter.and_(img_reduced, np.invert(s["img_mask_use"]))
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_small_fill.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(img_small))
     s["img_mask_small_removed"] = (img_small * 255) > 0
 
     prev_mask = s["img_mask_use"]
     s["img_mask_use"] = img_reduced
 
-    rps = measure.regionprops(morphology.label(img_small))
+    label_small = adapter(morphology.label)(img_small)
+    rps = adapter(measure.regionprops)(label_small)
     if rps:
-        areas = np.asarray([rp.area for rp in rps])
+        areas = np.asarray([float(rp.area) for rp in rps])
         nobj = len(rps)
         area_max = areas.max()
         area_mean = areas.mean()
diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py
index cd04be5..b1451f4 100644
--- a/histoqc/SaveModule.py
+++ b/histoqc/SaveModule.py
@@ -1,38 +1,43 @@
 import logging
 import os
-from skimage import io
 from skimage.util import img_as_ubyte
 from distutils.util import strtobool
 from skimage import color
 import numpy as np
 from histoqc.BaseImage import BaseImage
+from histoqc.array_adapter.typing import TYPE_ARRAY
+from histoqc.array_adapter import ArrayAdapter
 
 
-def blend2Images(img, mask):
+def blend2Images(img: TYPE_ARRAY, mask: TYPE_ARRAY, adapter: ArrayAdapter):
     if img.ndim == 3:
-        img = color.rgb2gray(img)
+        img = adapter(color.rgb2gray)(img)
     if mask.ndim == 3:
-        mask = color.rgb2gray(mask)
+        mask = adapter(color.rgb2gray)(mask)
     img = img[:, :, None] * 1.0  # can't use boolean
     mask = mask[:, :, None] * 1.0
+    # explicitly sync again to satisfy the requirement of using np.concatenate as a unified concatenate func
+    img, mask = adapter.device_sync_all(img, mask)
     out = np.concatenate((mask, img, mask), 2)
     return out
 
 
 def saveFinalMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tsaveUsableRegion")
-
+    adapter = s.image_handle.adapter
     mask = s["img_mask_use"]
     for mask_force in s["img_mask_force"]:
+        mask, s[mask_force] = adapter.device_sync_all(mask, s[mask_force])
         mask[s[mask_force]] = 0
 
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask))
+    fname = os.path.join(s["outdir"], f"{s['filename']}_mask_use.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(mask))
 
     if strtobool(params.get("use_mask", "True")):  # should we create and save the fusion mask?
         img = s.getImgThumb(s["image_work_size"])
-        out = blend2Images(img, mask)
-        io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out))
-
+        out = blend2Images(img, mask, adapter)
+        fname = os.path.join(s["outdir"], f"{s['filename']}_fuse.png")
+        adapter.imsave(fname, adapter(img_as_ubyte)(out))
     return
 
 
@@ -45,7 +50,7 @@ def saveAssociatedImage(s: BaseImage, key: str, dim: int):
         logging.warning(message)
         s["warnings"].append(message)
         return
-    
+
     # get asscociated image by key
     associated_img = image_handle.associated_images[key]
     width, height = image_handle.__class__.backend_dim(associated_img)
@@ -63,7 +68,9 @@ def saveAssociatedImage(s: BaseImage, key: str, dim: int):
     # resize the pil (RGB)
     associated_img = associated_img.resize(size).convert("RGB")
     # save the pil
-    associated_img.save(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png")
+    fname = os.path.join(s["outdir"], f"{s['filename']}_{key}.png")
+    associated_img.save(fname)
+    return
 
 
 def saveMacro(s, params):
@@ -72,10 +79,10 @@ def saveMacro(s, params):
     return
 
 
-def saveMask(s, params):
+def saveMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tsaveMaskUse")
     suffix = params.get("suffix", None)
-    
+    adapter = s.image_handle.adapter
     # check suffix param
     if not suffix:
         msg = f"{s['filename']} - \tPlease set the suffix for mask use."
@@ -83,15 +90,21 @@ def saveMask(s, params):
         return
 
     # save mask
-    io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"]))
+    fname = os.path.join(s['outdir'], f"{s['filename']}_{suffix}.png")
+    adapter.imsave(fname, adapter(img_as_ubyte)(s["img_mask_use"]))
+
+    return
 
 
-def saveThumbnails(s, params):
+def saveThumbnails(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tsaveThumbnail")
     # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one
     img = s.getImgThumb(params.get("image_work_size", "1.25x"))
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img)
+    adapter = s.image_handle.adapter
+    fname_thumb = os.path.join(s["outdir"], f"{s['filename']}_thumb.png")
+    adapter.imsave(fname_thumb, adapter(img_as_ubyte)(img))
 
     img = s.getImgThumb(params.get("small_dim", 500))
-    io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img)
+    fname_small = os.path.join(s["outdir"], f"{s['filename']}_thumb_small.png")
+    adapter.imsave(fname_small, adapter(img_as_ubyte)(img))
     return
diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py
index 07bf184..65d0383 100644
--- a/histoqc/TileExtractionModule.py
+++ b/histoqc/TileExtractionModule.py
@@ -6,6 +6,7 @@
 import os
 import json
 from histoqc.BaseImage import BaseImage
+from histoqc.array_adapter import ArrayDevice
 from typing import Callable, Dict, Any, List, Tuple, Union
 import numpy as np
 from PIL import Image, ImageDraw
@@ -15,10 +16,7 @@
 import logging
 from typing_extensions import Literal, get_args
 from PIL.Image import Image as PILImage
-# from histoqc.import_wrapper.helper import dynamic_import
-# __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]]
-# Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions")
-# get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions")
+
 
 TYPE_TILE_SIZE = Literal['tile_size']
 TYPE_TILE_STRIDE = Literal['tile_stride']
@@ -554,7 +552,10 @@ def valid_tile_extraction(self,
 
 
 def extract(s: BaseImage, params: Dict[PARAMS, Any]):
+
     logging.info(f"{s['filename']} - \textract")
+
+    adapter = s.image_handle.adapter
     with params['lock']:
         slide_out = s['outdir']
         tile_output_dir = params.get('tile_output', os.path.join(slide_out, 'tiles'))
@@ -568,9 +569,9 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]):
         tile_size = int(params.get('tile_size', 256))
         tile_stride = int(params.get('tile_stride', 256))
         tissue_thresh = float(params.get('tissue_ratio', 0.5))
-
-        img_use_for_tiles = s.getImgThumb(s["image_work_size"])
-        mask_use_for_tiles = s['img_mask_use']
+        # no added value from GPU acceleration (except for read_region) as the procedure is sequential
+        img_use_for_tiles = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU)
+        mask_use_for_tiles = adapter.move_to_device(s['img_mask_use'], ArrayDevice.CPU)
         image_handle = s.image_handle
         img_w, img_h = image_handle.dimensions
 
diff --git a/histoqc/__main__.py b/histoqc/__main__.py
index 415f479..430d33f 100644
--- a/histoqc/__main__.py
+++ b/histoqc/__main__.py
@@ -22,6 +22,7 @@
 from histoqc._worker import worker_error
 from histoqc.config import read_config_template
 from histoqc.data import managed_pkg_data
+from histoqc.wsi_handles.constants import KEY_CUCIM
 
 
 @managed_pkg_data
@@ -66,7 +67,8 @@ def main(argv=None):
     args = parser.parse_args(argv)
 
     # --- multiprocessing and logging setup -----------------------------------
-
+    # todo: move config parsing above the mpm initialization and set the start method accordingly
+    # multiprocessing.set_start_method("spawn")
     setup_logging(capture_warnings=True, filter_warnings='ignore')
     mpm = multiprocessing.Manager()
     lm = MultiProcessingLogManager('histoqc', manager=mpm)
@@ -83,6 +85,12 @@ def main(argv=None):
         lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.")
         config.read_string(read_config_template(args.config))
 
+    # todo: for cuda --> must use spawn method if CUDA is enabled.
+    # todo: however a better memory management scheme should be tested.
+    # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime
+    # todo: at this moment we simply override the args.nprocesses
+    if config["BaseImage.BaseImage"].get("handles") == KEY_CUCIM:
+        args.nprocesses = 0
     # --- provide models, pen and templates as fallbacks from package data ----
 
     managed_pkg_data.inject_pkg_data_fallback(config)
diff --git a/histoqc/_worker.py b/histoqc/_worker.py
index 54af16d..ecf4e6a 100644
--- a/histoqc/_worker.py
+++ b/histoqc/_worker.py
@@ -1,7 +1,7 @@
 """histoqc worker functions"""
 import os
 import shutil
-
+import traceback
 from histoqc.BaseImage import BaseImage
 from histoqc._pipeline import load_pipeline
 from histoqc._pipeline import setup_plotting_backend
@@ -37,21 +37,19 @@ def worker(idx, file_name, *,
     log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")
 
     try:
-        s = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")))
-
+        s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")))
         for process, process_params in process_queue:
             process_params["lock"] = lock
             process_params["shared_dict"] = shared_dict
             process(s, process_params)
             s["completed"].append(process.__name__)
-
     except Exception as exc:
         # reproduce histoqc error string
         _oneline_doc_str = exc.__doc__.replace('\n', '')
         err_str = f"{exc.__class__} {_oneline_doc_str} {exc}"
-
+        trace_string = traceback.format_exc()
         log_manager.logger.error(
-            f"{file_name} - Error analyzing file (skipping): \t {err_str}"
+            f"{file_name} - Error analyzing file (skipping): \t {err_str}. Traceback: {trace_string}"
         )
         if exc.__traceback__.tb_next is not None:
             func_tb_obj = str(exc.__traceback__.tb_next.tb_frame.f_code)
@@ -64,7 +62,9 @@ def worker(idx, file_name, *,
     else:
         # So long as the gc is triggered to delete the handle, the close is called to release the resources,
         # as documented in the openslide and cuimage's source code.
-        s.image_handle = None
+        # todo: should simply handle the __del__
+        s.image_handle.close()
+        # s.image_handle.handle = None
         return s
 
 
diff --git a/histoqc/array_adapter/__init__.py b/histoqc/array_adapter/__init__.py
new file mode 100644
index 0000000..c43f502
--- /dev/null
+++ b/histoqc/array_adapter/__init__.py
@@ -0,0 +1,2 @@
+from .func_mapping import FUNC_MAP
+from .adapter import ArrayAdapter, ArrayDevice
diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py
new file mode 100644
index 0000000..17c46e3
--- /dev/null
+++ b/histoqc/array_adapter/adapter.py
@@ -0,0 +1,316 @@
+from __future__ import annotations
+from histoqc.array_adapter.typing import TYPE_NP, TYPE_CP, TYPE_ARRAY
+from histoqc.array_adapter.func_mapping import FUNC_MAP
+import numpy as np
+from numbers import Number
+from typing import Callable, Mapping, Tuple, Optional, Any, Iterable
+from typing_extensions import Self, TypeGuard
+from histoqc.import_wrapper.cupy_extra import cupy as cp
+from enum import Enum
+import logging
+import functools
+from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv
+import skimage
+
+
+def cupy_installed() -> bool:
+    try:
+        import cupy
+        return True
+    except ImportError:
+        return False
+
+
+class ArrayDevice(Enum):
+    CPU: str = 'cpu'
+    CUDA: str = 'cuda'
+
+    @classmethod
+    def from_bool(cls, on_cpu: bool):
+        value = 'cpu' if on_cpu else 'cuda'
+        return cls(value)
+
+    @classmethod
+    def from_str(cls, str_val: str):
+        assert isinstance(str_val, str)
+        return cls(str_val)
+
+    @classmethod
+    def build(cls, value: str | Self):
+        if isinstance(value, cls):
+            return value
+        if isinstance(value, str):
+            return cls.from_str(value)
+        raise TypeError(f'Unexpected type {type(value)}')
+
+
+class ArrayAdapter(Callable):
+
+    func_map: Mapping[Callable, Callable]
+    input_device: Optional[str | ArrayDevice]
+    output_device: Optional[str | ArrayDevice]
+    contingent_type: ArrayDevice
+
+    TYPE_CONTINGENT_DEFAULT: ArrayDevice = ArrayDevice.CUDA
+
+    @staticmethod
+    def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]:
+        return isinstance(arr, np.ndarray)
+
+    @staticmethod
+    def is_cupy(arr: TYPE_CP) -> TypeGuard[TYPE_CP]:
+        return cupy_installed() and isinstance(arr, cp.ndarray)
+
+    @staticmethod
+    def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP:
+        if ArrayAdapter.is_numpy(arr) or isinstance(arr, Number):
+            return np.array(arr, copy=copy)
+        assert ArrayAdapter.is_cupy(arr)
+        return arr.get()
+
+    @staticmethod
+    def to_cupy(arr: TYPE_ARRAY | Number, copy: bool = False) -> TYPE_CP:
+        assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \
+            f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}"
+        return cp.array(arr, copy=copy)
+
+    @staticmethod
+    def array_device_type(arr: TYPE_ARRAY) -> ArrayDevice:
+        on_cpu = isinstance(arr, np.ndarray)
+        return ArrayDevice.from_bool(on_cpu)
+
+    @staticmethod
+    def is_array(arr: TYPE_ARRAY) -> bool:
+        return isinstance(arr, np.ndarray) or (cupy_installed() and isinstance(arr, cp.ndarray))
+
+    @classmethod
+    def new_array(cls, arr: Any, array_device: ArrayDevice) -> TYPE_ARRAY:
+        if cls.is_array(arr):
+            return cls.move_to_device(arr, array_device, copy=False)
+        if isinstance(arr, Iterable):
+            arr = cls.curate_array_device(*arr, device=array_device, copy=False)
+        if array_device is array_device.CUDA:
+            assert cupy_installed()
+            return cp.asarray(arr)
+        elif array_device is array_device.CPU:
+            return np.asarray(arr)
+        raise TypeError(f'Unexpected device {ArrayDevice}')
+
+    def asarray(self, arr: Any) -> TYPE_ARRAY:
+        return self.__class__.new_array(arr, self.output_device)
+
+    @classmethod
+    def move_to_device(cls,
+                       arr: Optional[TYPE_ARRAY],
+                       device: Optional[ArrayDevice], copy: bool = False) -> TYPE_ARRAY:
+        # structural match > py3.10
+        if device is None or not cls.is_array(arr):
+            return arr
+        if device is ArrayDevice.CPU:
+            return ArrayAdapter.to_numpy(arr, copy=copy)
+        elif device is ArrayDevice.CUDA:
+            return ArrayAdapter.to_cupy(arr, copy=copy)
+        raise ValueError(f"Unsupported device: {device}")
+
+    def sync(self, arr: Optional[TYPE_ARRAY | Number], copy: bool = False) -> TYPE_ARRAY:
+        if not self.__class__.is_array(arr):
+            return arr
+        return self.__class__.move_to_device(arr, device=self.output_device, copy=copy)
+
+    @classmethod
+    def curate_device_helper(cls, output: TYPE_ARRAY, device: Optional[ArrayDevice], copy: bool):
+        if output is not None and cls.is_array(output):
+            output = cls.move_to_device(output, device, copy=copy)
+        return output
+
+    @classmethod
+    def curate_array_device(cls, *arrays: TYPE_ARRAY,
+                            device: Optional[ArrayDevice], copy: bool) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        # already an array - no need to recursively unpack
+        if cls.is_array(arrays):
+            return cls.curate_device_helper(arrays, device, copy)
+        # not array and not iterable --> unpack
+        if not isinstance(arrays, Iterable):
+            return arrays
+        # only one input
+        if len(arrays) == 1:
+            return cls.curate_device_helper(arrays[0], device=device, copy=copy)
+        out_list = []
+        for o in arrays:
+            out_list.append(cls.curate_device_helper(o, device=device, copy=copy))
+        return tuple(out_list)
+
+    @staticmethod
+    def get_api(cpu_func: Callable,
+                func_map: Mapping[Callable, Callable], device_type: ArrayDevice) -> Tuple[Callable, ArrayDevice]:
+        if device_type == ArrayDevice.CPU:
+            return cpu_func, ArrayDevice.CPU
+        mapped = func_map.get(cpu_func, None)
+        if mapped is not None:
+            return mapped, ArrayDevice.CUDA
+        # if not implemented
+        func_name = getattr(cpu_func, '__qualname__', cpu_func.__name__)
+        logging.info(f"{__name__}: {func_name} does not have a GPU implementation. Revert to CPU")
+        return cpu_func, ArrayDevice.CPU
+
+    @classmethod
+    def unified_call(cls,
+                     cpu_func: Callable,
+                     func_map: Mapping[Callable, Callable],
+                     input_device: Optional[str | ArrayDevice],
+                     output_device: Optional[str | ArrayDevice],
+                     data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
+        # use input_device to override the current device, if not None
+        data = cls.curate_array_device(data, device=input_device, copy=False)
+        input_type = cls.array_device_type(data)
+        # attempt to fetch the op, revert to CPU if GPU impl is not available
+        func, func_device = cls.get_api(cpu_func, func_map, input_type)
+        func_in = cls.curate_array_device(data, device=func_device, copy=False)
+
+        curated_args = cls.curate_array_device(*args, device=func_device, copy=False)
+        curated_kwargs = dict()
+
+        for k, v in kwargs.items():
+            curated_kwargs[k] = cls.curate_array_device(v, device=func_device, copy=False)
+
+        output = func(func_in, *curated_args, **curated_kwargs)
+        # only move the output around if the output is an array
+        if isinstance(output, tuple):
+            return cls.curate_array_device(*output, device=output_device, copy=False)
+        return cls.curate_array_device(output, device=output_device, copy=False)
+
+    @classmethod
+    def _validate_device(cls, device_type: Optional[ArrayDevice]) -> Optional[ArrayDevice]:
+        if device_type is None:
+            return None
+        if device_type is ArrayDevice.CPU:
+            return device_type
+        assert device_type is ArrayDevice.CUDA, f"Unsupported device_type: {device_type}"
+        if not cupy_installed():
+            logging.info(f"Cupy is not installed. Revert to CPU")
+            return ArrayDevice.CPU
+        return device_type
+
+    def __init__(self,
+                 input_device: Optional[str | ArrayDevice],
+                 output_device: Optional[str | ArrayDevice],
+                 func_map: Mapping[Callable, Callable],
+                 contingent_type: ArrayDevice,
+                 ):
+        self.input_device = self.__class__._validate_device(input_device)
+        self.output_device = self.__class__._validate_device(output_device)
+        self.func_map = func_map
+        self.contingent_type = contingent_type
+
+    @classmethod
+    def build(cls,
+              input_device: Optional[str | ArrayDevice],
+              output_device: Optional[str | ArrayDevice],
+              func_map: Mapping[Callable, Callable] = FUNC_MAP,
+              contingent_type: ArrayDevice = TYPE_CONTINGENT_DEFAULT):
+        return cls(input_device=input_device, output_device=output_device, func_map=func_map,
+                   contingent_type=contingent_type)
+
+    def apply(self, /, cpu_func: Callable, data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
+        return self.unified_call(cpu_func, self.func_map, self.input_device, self.output_device,
+                                 data, *args, **kwargs)
+
+    def __call__(self, cpu_func: Callable) -> Callable:
+        return functools.partial(self.apply, cpu_func)
+
+    @staticmethod
+    def __sync_device_output_helper(*arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        assert len(arrays) > 0
+        if len(arrays) == 1:
+            return arrays[0]
+        return arrays
+
+    @classmethod
+    def device_sync_all_helper(cls, *arrays: TYPE_ARRAY,
+                               array_device: Optional[ArrayDevice],
+                               contingent_type: Optional[ArrayDevice]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        assert isinstance(arrays, tuple), f"input check. {type(arrays)} is not a tuple"
+        if array_device is not None:
+            return cls.__sync_device_output_helper(tuple(cls.move_to_device(arr, array_device) for arr in arrays))
+        assert array_device is None
+        has_contingent_device = any(cls.array_device_type(arr) is contingent_type for arr in arrays)
+        if has_contingent_device:
+            assert contingent_type is not None
+            return cls.__sync_device_output_helper(cls.device_sync_all_helper(*arrays,
+                                                                              array_device=contingent_type,
+                                                                              contingent_type=None))
+        return cls.__sync_device_output_helper(arrays)
+
+    def device_sync_all(self, *arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        return self.__class__.device_sync_all_helper(*arrays, array_device=self.output_device,
+                                                     contingent_type=self.contingent_type)
+
+    @classmethod
+    def binary_operation(cls, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY,
+                         input_device: Optional[ArrayDevice],
+                         output_device: Optional[ArrayDevice],
+                         contingent_type: Optional[ArrayDevice],
+                         op: Callable) -> TYPE_ARRAY:
+        arr1, arr2 = cls.device_sync_all_helper(arr1, arr2, array_device=input_device,
+                                                contingent_type=contingent_type)
+        result: TYPE_ARRAY = op(arr1, arr2)
+        return cls.curate_array_device(result, device=output_device, copy=False)
+
+    def and_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=and_)
+
+    def or_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=or_)
+
+    def add(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=add)
+
+    def sub(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=sub)
+
+    def mul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=mul)
+
+    def matmul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=matmul)
+
+    def truediv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=truediv)
+
+    def floordiv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=floordiv)
+
+    def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
+        return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
+                                               output_device=self.output_device,
+                                               contingent_type=self.contingent_type,
+                                               op=xor)
+
+    @classmethod
+    def imsave(cls, filename: str, arr: TYPE_ARRAY):
+        arr = cls.curate_array_device(arr, device=ArrayDevice.CPU, copy=False)
+        return skimage.io.imsave(filename, arr)
diff --git a/histoqc/array_adapter/array_api_compat.py b/histoqc/array_adapter/array_api_compat.py
new file mode 100644
index 0000000..4527a6e
--- /dev/null
+++ b/histoqc/array_adapter/array_api_compat.py
@@ -0,0 +1,4 @@
+try:
+    import array_api_compat
+except ImportError:
+    ...
diff --git a/histoqc/array_adapter/func_mapping.py b/histoqc/array_adapter/func_mapping.py
new file mode 100644
index 0000000..aa55b46
--- /dev/null
+++ b/histoqc/array_adapter/func_mapping.py
@@ -0,0 +1,51 @@
+import scipy.ndimage
+import skimage
+from typing import Callable, Mapping
+from scipy import ndimage as sci_ndi
+import numpy as np
+try:
+    import cupy as cp
+    from scipy import signal as sci_signal
+    from cupyx.scipy import signal as cu_signal
+    from cucim import skimage as cu_skimage
+    from cupyx.scipy import ndimage as cu_ndi
+
+    FUNC_MAP: Mapping[Callable, Callable] = {
+        skimage.color.convert_colorspace: cu_skimage.color.convert_colorspace,
+        skimage.color.rgb2gray: cu_skimage.color.rgb2gray,
+        skimage.color.separate_stains: cu_skimage.color.separate_stains,
+        skimage.exposure.equalize_hist: cu_skimage.exposure.equalize_hist,
+        # not implemented
+        # skimage.feature.graycomatrix: cu_skimage.feature.graycomatrix,
+        # skimage.feature.local_binary_pattern: cu_skimage.feature.local_binary_pattern,
+        skimage.filters.frangi: cu_skimage.filters.frangi,
+        skimage.filters.gaussian: cu_skimage.filters.gaussian,
+        skimage.filters.gabor_kernel: cu_skimage.filters.gabor_kernel,
+        skimage.filters.gabor: cu_skimage.filters.gabor,
+        skimage.filters.laplace: cu_skimage.filters.laplace,
+        skimage.filters.median: cu_skimage.filters.median,
+
+        # skimage.filters.rank.otsu: cu_skimage.filters.rank.otsu,
+        skimage.filters.sobel: cu_skimage.filters.sobel,
+        skimage.filters.threshold_otsu: cu_skimage.filters.threshold_otsu,
+        skimage.measure.regionprops: cu_skimage.measure.regionprops,
+        skimage.morphology.binary_opening: cu_skimage.morphology.binary_opening,
+        # the morphology.label is just an alias of the measure.label
+        skimage.morphology.label: cu_skimage.measure.label,
+        skimage.morphology.dilation: cu_skimage.morphology.dilation,
+        skimage.morphology.disk: cu_skimage.morphology.disk,
+        skimage.morphology.remove_small_holes: cu_skimage.morphology.remove_small_holes,
+        skimage.morphology.remove_small_objects: cu_skimage.morphology.remove_small_objects,
+        skimage.transform.resize: cu_skimage.transform.resize,
+        skimage.util.img_as_bool: cu_skimage.util.img_as_bool,
+        skimage.util.img_as_ubyte: cu_skimage.util.img_as_ubyte,
+        sci_ndi.convolve: cu_ndi.convolve,
+
+        # can be replaced by erosion, but is actually slower for uint dtypes
+        skimage.filters.rank.minimum: None,  # cu_skimage.morphology.erosion,
+        sci_signal.convolve2d: cu_signal.convolve2d,
+        sci_ndi.generate_binary_structure: cu_ndi.generate_binary_structure,
+        np.digitize: cp.digitize
+    }
+except ImportError:
+    FUNC_MAP = dict()
diff --git a/histoqc/array_adapter/implementation.py b/histoqc/array_adapter/implementation.py
new file mode 100644
index 0000000..e69de29
diff --git a/histoqc/array_adapter/typing.py b/histoqc/array_adapter/typing.py
new file mode 100644
index 0000000..900ceac
--- /dev/null
+++ b/histoqc/array_adapter/typing.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+import numpy as np
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    # for forward reference only -- annotate the objects from the optional cupy dependency
+    # noinspection PyUnresolvedReferences
+    import cupy as cp
+
+
+TYPE_NP = np.ndarray
+TYPE_CP = "cp.ndarray"
+TYPE_ARRAY = "np.ndarray | cp.ndarray"
diff --git a/histoqc/config/config.ini b/histoqc/config/config.ini
index 0ee89bd..86872de 100644
--- a/histoqc/config/config.ini
+++ b/histoqc/config/config.ini
@@ -26,6 +26,7 @@ steps= BasicModule.getBasicStats
 
 [BaseImage.BaseImage]
 image_work_size = 1.25x
+handles = openslide
 
 #not yet implemented
 confirm_base_mag: False
diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py
new file mode 100644
index 0000000..c67d3e1
--- /dev/null
+++ b/histoqc/import_wrapper/cupy_extra.py
@@ -0,0 +1,6 @@
+try:
+    import cupy
+except ImportError:
+    cupy = None
+finally:
+    cp = cupy
diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py
index c5b5713..9a65a71 100644
--- a/histoqc/wsi_handles/base.py
+++ b/histoqc/wsi_handles/base.py
@@ -1,10 +1,13 @@
+from __future__ import annotations
 from abc import ABC, abstractmethod
+
 from histoqc.import_wrapper import dynamic_import
 import logging
 from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic
 import numpy as np
 from PIL.Image import Image as PILImage
 from typing_extensions import final
+from histoqc.array_adapter import ArrayDevice, ArrayAdapter
 import os
 
 from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER
@@ -18,6 +21,7 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]):
 
     handle: T
     fname: str
+    _adapter: ArrayAdapter
 
     @staticmethod
     def curate_shorter_edge(width, height, limit, aspect_ratio):
@@ -314,6 +318,7 @@ def __create_handle(cls, fname: str,
             # noinspection PyBroadException
             try:
                 image_handle = handle_class(fname)
+                break
             except Exception:
                 # current wsi handle class doesn't support this file
                 msg = f"WSIImageHandle: \"{handle_class}\" doesn't support {fname}"
@@ -337,4 +342,24 @@ def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle":
 
     def __init__(self, fname: str):
         self.fname = fname
+        self._adapter = ArrayAdapter.build(input_device=self.device, output_device=self.device)
+
+    @abstractmethod
+    def close_handle(self):
+        ...
+
+    def close(self):
+        self.close_handle()
+        self.handle = None
 
+    def is_closed(self):
+        return not hasattr(self, "handle") or self.handle is None
+
+    @property
+    @abstractmethod
+    def device(self) -> ArrayDevice:
+        raise NotImplementedError
+
+    @property
+    def adapter(self) -> ArrayAdapter:
+        return self._adapter
diff --git a/histoqc/wsi_handles/constants.py b/histoqc/wsi_handles/constants.py
index 16a297b..9fcec88 100644
--- a/histoqc/wsi_handles/constants.py
+++ b/histoqc/wsi_handles/constants.py
@@ -14,7 +14,7 @@
 WSI_HANDLES: Dict[str, Tuple[str, str]] = {
     KEY_OPENSLIDE: (MODULE_OPENSLIDE, CLASS_OPENSLIDE),
     # todo: add unified interface
-    # KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM),
+    KEY_CUCIM: (MODULE_CUCIM, CLASS_CUCIM),
 }
 
 HANDLE_DELIMITER = ','
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
index e37172a..c4df848 100644
--- a/histoqc/wsi_handles/cuimage_handle.py
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -10,6 +10,7 @@
 from lazy_property import LazyProperty
 import numpy as np
 from cucim import skimage as c_skimage
+from histoqc.array_adapter import ArrayDevice
 
 
 class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
@@ -114,7 +115,9 @@ def get_thumbnail(self, new_dim):
         aspect_ratio = self.dimensions[0] / self.dimensions[1]
 
         target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
-        return c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w))
+        resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w))
+
+        return c_skimage.util.img_as_ubyte(resized)
 
     def get_best_level_for_downsample(self, down_factor: float) -> int:
         """Return the largest level that's smaller than the target downsample factor, consistent with openslide.
@@ -164,8 +167,20 @@ def grid_stack(grid: List[List[cp.ndarray]]):
 
     @staticmethod
     def backend_dim(region: CuImage) -> Tuple[int, int]:
-        return cast(Tuple[int, int], tuple(region.size()[:2]))
+        return cast(Tuple[int, int], tuple(region.size()[:2][::-1]))
 
     @staticmethod
     def array_shape(arr: cp.ndarray) -> Tuple[int, ...]:
         return arr.shape
+
+    def close_handle(self):
+        if hasattr(self, "handle") and self.handle is not None:
+            self.handle.close()
+            self.handle = None
+        if self.dummy_handle is not None:
+            self.dummy_handle.close()
+            self.dummy_handle = None
+
+    @property
+    def device(self) -> ArrayDevice:
+        return ArrayDevice.CUDA
diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py
index 6388329..c426cf2 100644
--- a/histoqc/wsi_handles/openslide_handle.py
+++ b/histoqc/wsi_handles/openslide_handle.py
@@ -8,6 +8,7 @@
 from PIL.Image import Image as PILImage
 from .utils import rgba2rgb_pil
 from PIL import Image
+from histoqc.array_adapter import ArrayDevice
 
 
 class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]):
@@ -143,3 +144,12 @@ def backend_dim(region: PILImage) -> Tuple[int, int]:
     @staticmethod
     def array_shape(arr: np.ndarray) -> Tuple[int, ...]:
         return arr.shape
+
+    def close_handle(self):
+        if hasattr(self, "handle"):
+            self.handle.close()
+        self.handle = None
+
+    @property
+    def device(self) -> ArrayDevice:
+        return ArrayDevice.CPU
diff --git a/imported_functions_list.txt b/imported_functions_list.txt
new file mode 100644
index 0000000..e69de29

From e35e1bac2316d5a80a8725405d1892ff4ca071ba Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Thu, 2 May 2024 10:40:14 -0400
Subject: [PATCH 3/6] edge case wherein no comment is available from WSI

---
 histoqc/BasicModule.py                | 3 ++-
 histoqc/ClassificationModule.py       | 2 --
 histoqc/wsi_handles/cuimage_handle.py | 8 ++++++++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py
index fa90ff7..dfc59a7 100644
--- a/histoqc/BasicModule.py
+++ b/histoqc/BasicModule.py
@@ -17,7 +17,8 @@ def getBasicStats(s: BaseImage, params):
     s.addToPrintList("width", osh.dimensions[0] if len(osh.dimensions) >= 2 else "NA")
     s.addToPrintList("mpp_x", osh.mpp_x)
     s.addToPrintList("mpp_y", osh.mpp_y)
-    s.addToPrintList("comment", osh.comment.replace("\n", " ").replace("\r", " "))
+    comment = osh.comment if osh.comment else ""
+    s.addToPrintList("comment", comment.replace("\n", " ").replace("\r", " "))
     return
 
 
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index b2cbb4a..1cfa732 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -181,8 +181,6 @@ def compute_features(img, params):
 
 
 def byExampleWithFeatures(s: BaseImage, params):
-    device = s.image_handle.device
-    adapter = ArrayAdapter.build(input_device=device, output_device=device)
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}")
 
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
index c4df848..df23ee3 100644
--- a/histoqc/wsi_handles/cuimage_handle.py
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -11,6 +11,7 @@
 import numpy as np
 from cucim import skimage as c_skimage
 from histoqc.array_adapter import ArrayDevice
+import gc
 
 
 class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
@@ -173,13 +174,20 @@ def backend_dim(region: CuImage) -> Tuple[int, int]:
     def array_shape(arr: cp.ndarray) -> Tuple[int, ...]:
         return arr.shape
 
+    def release(self):
+        cp.get_default_memory_pool().free_all_blocks()
+        cp.get_default_pinned_memory_pool().free_all_blocks()
+
     def close_handle(self):
         if hasattr(self, "handle") and self.handle is not None:
             self.handle.close()
+            del self.handle
             self.handle = None
+            gc.collect()
         if self.dummy_handle is not None:
             self.dummy_handle.close()
             self.dummy_handle = None
+        self.release()
 
     @property
     def device(self) -> ArrayDevice:

From 39fa170eafed8e3e2846b7da1037ffc0b4f34795 Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Sun, 5 May 2024 20:06:20 -0400
Subject: [PATCH 4/6] simple mp support for GPU

---
 histoqc/AnnotationModule.py             |   6 +-
 histoqc/BaseImage.py                    |  41 +++-
 histoqc/BasicModule.py                  |   2 -
 histoqc/BlurDetectionModule.py          |   3 +-
 histoqc/BubbleRegionByRegion.py         |   5 +-
 histoqc/ClassificationModule.py         |  17 +-
 histoqc/DeconvolutionModule.py          |   1 -
 histoqc/HistogramModule.py              |   9 +-
 histoqc/LightDarkModule.py              |   5 +-
 histoqc/LocalTextureEstimationModule.py |   4 +-
 histoqc/MorphologyModule.py             |   5 +-
 histoqc/TileExtractionModule.py         |   8 +-
 histoqc/__main__.py                     |  87 +++++--
 histoqc/_worker.py                      |  38 ++-
 histoqc/array_adapter/__init__.py       |   2 +-
 histoqc/array_adapter/adapter.py        | 314 +++++++++++++++++-------
 histoqc/array_adapter/func_mapping.py   |  11 +-
 histoqc/import_wrapper/cupy_extra.py    |   2 +
 histoqc/wsi_handles/base.py             |  51 ++--
 histoqc/wsi_handles/cuimage_handle.py   | 119 ++++++---
 histoqc/wsi_handles/openslide_handle.py |  38 ++-
 21 files changed, 538 insertions(+), 230 deletions(-)

diff --git a/histoqc/AnnotationModule.py b/histoqc/AnnotationModule.py
index b36a7fc..f5e1be8 100644
--- a/histoqc/AnnotationModule.py
+++ b/histoqc/AnnotationModule.py
@@ -1,7 +1,7 @@
 import logging
 from typing import List, Tuple
 from histoqc.BaseImage import printMaskHelper, BaseImage
-from histoqc.array_adapter import ArrayDevice, ArrayAdapter
+from histoqc.array_adapter import ArrayAdapter, Device
 from skimage import io
 from skimage.util import img_as_ubyte
 import os
@@ -78,7 +78,9 @@ def getParams(s: BaseImage, params):
 def saveAnnotationMask(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetAnnotationMask")
     # quite pointless to enforce GPU acceleration here. Force to use CPU mode
-    adaptor = ArrayAdapter.build(input_device=ArrayDevice.CPU, output_device=ArrayDevice.CPU)
+    adaptor = ArrayAdapter.build(input_device=Device.build(Device.DEVICE_CPU),
+                                 output_device=Device.build(Device.DEVICE_CPU),
+                                 contingent_device=Device.build(Device.DEVICE_CPU))
 
     (ann_format, file_path, suffix) = getParams(s, params)
 
diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py
index aa507b5..97fe60c 100644
--- a/histoqc/BaseImage.py
+++ b/histoqc/BaseImage.py
@@ -1,15 +1,19 @@
 from __future__ import annotations
 import logging
 import os
+import sys
+
 import numpy as np
 import zlib
+import traceback
 import dill
 from distutils.util import strtobool
 import re
 from typing import Union, Tuple, cast, Optional
 from histoqc.wsi_handles.base import WSIImageHandle
-from histoqc.wsi_handles.constants import KEY_OPENSLIDE, KEY_CUCIM
+from histoqc.wsi_handles.constants import KEY_CUCIM
 from histoqc.array_adapter.typing import TYPE_ARRAY
+from histoqc.array_adapter import ArrayDeviceType
 _REGEX_MAG = r"^(\d?\.?\d*X?)"
 _PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE)
 MAG_NA = None
@@ -36,22 +40,33 @@
 
 class BaseImage(dict):
 
-    _image_handle: WSIImageHandle
+    _image_handle: Optional[WSIImageHandle]
+    _device_id: Optional[int]
 
     @property
-    def image_handle(self) -> WSIImageHandle:
-        return self._image_handle
+    def image_handle(self) -> Optional[WSIImageHandle]:
+        if hasattr(self, "_image_handle"):
+            return self._image_handle
+        return None
 
     @image_handle.setter
     def image_handle(self, image_handle: WSIImageHandle):
         self._image_handle = image_handle
 
-    def __init__(self, fname, fname_outdir, params):
+    def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None):
         dict.__init__(self)
+        # init
+        self._device_id = device_id
+        self._image_handle = None
         handles = params.get("handles", KEY_CUCIM)
-        # dynamically load wsi image handle
-        self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles)
 
+        # dynamically load wsi image handle
+        try:
+            self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles, device_id=device_id)
+        except Exception:
+            trace_string = traceback.format_exc()
+            logging.error(f"{__name__}: {fname} -- Error Creating Handle - Traceback: {trace_string}")
+            sys.exit(1)
         self.in_memory_compression = strtobool(params.get("in_memory_compression", "False"))
 
         self["warnings"] = ['']  # this needs to be first key in case anything else wants to add to it
@@ -100,11 +115,13 @@ def __init__(self, fname, fname_outdir, params):
     def is_img_data(key: str) -> bool:
         return key.startswith("img") and key != "img_bbox"
 
-    def _sync_to_handle(self, key, value):
+    def _sync_to_handle(self, key, value, device: Optional[ArrayDeviceType] = None):
         if not self.__class__.is_img_data(key):
             return value
         if hasattr(self, "_image_handle") and self.image_handle is not None:
-            value = self.image_handle.adapter.sync(value)
+            device = device if device is not None else self.image_handle.device
+            value = self.image_handle.adapter.__class__.curate_arrays_device(value,
+                                                                             device=device, copy=False)
         return value
 
     def __getitem__(self, key):
@@ -149,7 +166,7 @@ def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool
         relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in osh.level_downsamples]
         level = np.where(relative_down_factors_idx)[0]
         if level.size:
-            return level[0], True
+            return cast(int, level[0]), True
         else:
             return osh.get_best_level_for_downsample(downsample_factor), False
 
@@ -234,9 +251,11 @@ def getImgThumb(self, size: str) -> Optional[TYPE_ARRAY]:
                 logging.info(
                     f"{self['filename']} - \t\tloading image from level {target_level} of size"
                     f" {image_handle.level_dimensions[target_level]}")
+                # PILLOW
                 tile = image_handle.read_region((bx, by), target_level, size)
+
                 self[key] = (np.asarray(self.image_handle.backend_rgba2rgb(tile))
-                             if np.shape(tile)[-1] == 4
+                             if len(tile.getbands()) == 4
                              else np.asarray(tile))
 
                 # specifies a desired size of thumbnail
diff --git a/histoqc/BasicModule.py b/histoqc/BasicModule.py
index dfc59a7..94cfe40 100644
--- a/histoqc/BasicModule.py
+++ b/histoqc/BasicModule.py
@@ -1,9 +1,7 @@
 import logging
 import os
 from histoqc.BaseImage import printMaskHelper
-from histoqc.array_adapter import ArrayAdapter, ArrayDevice
 from skimage.morphology import remove_small_objects, binary_opening, disk
-from skimage import io
 from skimage.util import img_as_ubyte
 from histoqc.BaseImage import BaseImage
 
diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py
index 7ea4993..176a132 100644
--- a/histoqc/BlurDetectionModule.py
+++ b/histoqc/BlurDetectionModule.py
@@ -3,11 +3,10 @@
 
 import skimage
 from histoqc.BaseImage import printMaskHelper, BaseImage
-from skimage import io, morphology, measure
+from skimage import morphology, measure
 from skimage.util import img_as_ubyte
 from skimage.color import rgb2gray
 import numpy as np
-from histoqc.array_adapter import ArrayAdapter, FUNC_MAP, ArrayDevice
 
 # Analysis of focus measure operators for shape-from-focus
 # Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012
diff --git a/histoqc/BubbleRegionByRegion.py b/histoqc/BubbleRegionByRegion.py
index fc2bf75..da253c5 100644
--- a/histoqc/BubbleRegionByRegion.py
+++ b/histoqc/BubbleRegionByRegion.py
@@ -9,7 +9,7 @@
 from skimage.color import rgb2gray
 from skimage.morphology import remove_small_objects
 from histoqc.BaseImage import BaseImage
-from skimage import io, color
+from skimage import color
 import numpy as np
 
 
@@ -47,7 +47,7 @@ def roiWise(s: BaseImage, params):
             # todo: confirm -- the original level is hardcoded to be 1, shouldn't it be the level variable?
 
             region = osh.region_backend((x, y), level, (win_size, win_size))
-            region = osh.backend_to_array(region)[..., :3]
+            region = osh.backend_to_array(region, osh.device)[..., :3]
             g = adapter(rgb2gray)(region)
             # todo -- forward compatibility. Later version of frangi alters the signatures
             sigmas = frangi_scale_range + (frangi_scale_step,)
@@ -114,4 +114,3 @@ def detectSmoothness(s: BaseImage, params):
         s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains "
                              f"detectable! Downstream modules likely to be incorrect/fail")
     return
-
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index 1cfa732..6c7f81f 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -2,7 +2,7 @@
 import os
 import re
 import sys
-from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.array_adapter import ArrayAdapter, Device
 from ast import literal_eval as make_tuple
 
 from distutils.util import strtobool
@@ -15,8 +15,6 @@
 from skimage.morphology import remove_small_objects, disk, dilation
 from skimage.feature import local_binary_pattern
 
-from scipy import ndimage as ndi
-
 from sklearn.naive_bayes import GaussianNB
 from sklearn.ensemble import RandomForestClassifier
 
@@ -40,12 +38,13 @@ def pixelWise(s: BaseImage, params):
     # todo no formal support for GNB now
     # todo  Possible solution: sklearn with array-api-compat and implement a wrapper into the ArrayAdaptor
     # todo Also: need to rework the GaussianNB.fit interface into a wrapper.
-    device = s.image_handle.device
-    adapter = ArrayAdapter.build(input_device=device, output_device=device)
-    img = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU)
+    device = s.image_handle.device_type
+    adapter: ArrayAdapter = s.image_handle.adapter
+    img = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]),
+                                       device=Device.build(Device.DEVICE_CPU))
 
     gnb = GaussianNB()
-    gnb.fit(model_vals[:, 1:], model_vals[:, 0])
+    adapter(gnb.fit)(model_vals[:, 1:], model_vals[:, 0])
     cal = adapter(gnb.predict_proba)(img.reshape(-1, 3))
 
     cal = cal.reshape(img.shape[0], img.shape[1], 2)
@@ -87,7 +86,6 @@ def compute_laplace(img, params):
     return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None]
 
 
-
 def compute_lbp(img, params):
     lbp_radius = float(params.get("lbp_radius", 3))
     lbp_points = int(params.get("lbp_points", 24))  # example sets radius * 8
@@ -99,7 +97,6 @@ def compute_lbp(img, params):
     return adapter(local_binary_pattern)(img_gray, P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None]
 
 
-
 def compute_gaussian(img, params):
     adapter = params["adapter"]
     gaussian_sigma = int(params.get("gaussian_sigma", 1))
@@ -242,8 +239,8 @@ def byExampleWithFeatures(s: BaseImage, params):
             # do stuff here with model_vals
             model_vals = np.vstack(model_vals)
             clf = RandomForestClassifier(n_jobs=-1)
-            # adapter(clf.fit)(model_vals, model_labels.ravel())
             adapter(clf.fit)(model_vals, y=model_labels.ravel())
+            # clf.fit(model_vals, y=model_labels.ravel())
             params["shared_dict"]["model_" + name] = clf
             logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done")
 
diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py
index f3b4613..91a943e 100644
--- a/histoqc/DeconvolutionModule.py
+++ b/histoqc/DeconvolutionModule.py
@@ -2,7 +2,6 @@
 import os
 import sys
 import numpy as np
-from skimage import io
 from skimage.util import img_as_ubyte
 from histoqc.BaseImage import BaseImage
 from skimage.color import separate_stains
diff --git a/histoqc/HistogramModule.py b/histoqc/HistogramModule.py
index 901beeb..0dd8a21 100644
--- a/histoqc/HistogramModule.py
+++ b/histoqc/HistogramModule.py
@@ -6,7 +6,7 @@
 from distutils.util import strtobool
 from histoqc.BaseImage import BaseImage
 from typing import Union
-from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.array_adapter import ArrayAdapter, Device
 from histoqc.array_adapter.typing import TYPE_ARRAY
 # todo: beware that because there is no lock, it is likely that each worker will compute the template of their own.
 # this holds a local copy of the histograms of the template images so that they need only be computed once
@@ -15,15 +15,16 @@
 
 def getHistogram(s: BaseImage, params):
     logging.info(f"{s['filename']} - \tgetHistogram")
-    adapter = s.image_handle.adapter
+    # adapter = s.image_handle.adapter
     limit_to_mask = strtobool(params.get("limit_to_mask", True))
     bins = int(params.get("bins", 20))
 
     img = s.getImgThumb(s["image_work_size"])
     tissue_mask = s["img_mask_use"]
     # matplotlib --> pointless to use GPU here even if a corresponding API exists
-    img = adapter.move_to_device(img, ArrayDevice.CPU)
-    tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU)
+    img, tissue_mask = ArrayAdapter.curate_arrays_device(img, tissue_mask,
+                                                         device=Device.build(Device.DEVICE_CPU))
+    # tissue_mask = adapter.move_to_device(tissue_mask, ArrayDevice.CPU)
     if limit_to_mask:
         img = img[tissue_mask]
     else:
diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py
index 4fe8a8d..2312089 100644
--- a/histoqc/LightDarkModule.py
+++ b/histoqc/LightDarkModule.py
@@ -3,7 +3,7 @@
 import numpy as np
 from histoqc.BaseImage import printMaskHelper, BaseImage
 from skimage import io, color
-from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.array_adapter import ArrayAdapter, Device
 from skimage.util import img_as_ubyte
 from distutils.util import strtobool
 from skimage.filters import threshold_otsu, rank
@@ -184,7 +184,8 @@ def saveEqualisedImage(s: BaseImage, params):
     img = adapter(color.rgb2gray)(img)
     img_u8 = adapter(img_as_ubyte)(img)
     out = adapter(exposure.equalize_hist)(img_u8)
-    out_u8 = adapter.move_to_device(adapter(img_as_ubyte)(out), ArrayDevice.CPU)
+    out_u8 = ArrayAdapter.curate_arrays_device(adapter(img_as_ubyte)(out),
+                                               device=Device.build(Device.DEVICE_CPU))
     io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", out_u8)
 
     return
diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py
index 2e28762..d7babd3 100644
--- a/histoqc/LocalTextureEstimationModule.py
+++ b/histoqc/LocalTextureEstimationModule.py
@@ -3,7 +3,7 @@
 from skimage import color
 from distutils.util import strtobool
 from skimage.feature import graycomatrix, graycoprops
-from histoqc.array_adapter import ArrayAdapter, ArrayDevice
+from histoqc.array_adapter import ArrayAdapter, Device
 from histoqc.BaseImage import BaseImage
 
 
@@ -31,7 +31,7 @@ def estimateGreyComatrixFeatures(s: BaseImage, params):
         return
 
     maskidx = mask.nonzero()
-    maskidx = ArrayAdapter.new_array(maskidx, array_device=ArrayDevice.CPU).transpose()
+    maskidx = ArrayAdapter.new_array(maskidx, array_device=Device.build(Device.DEVICE_CPU)).transpose()
     idx = np.random.choice(maskidx.shape[0], npatches)
 
     results = []
diff --git a/histoqc/MorphologyModule.py b/histoqc/MorphologyModule.py
index 4d2c301..426a358 100644
--- a/histoqc/MorphologyModule.py
+++ b/histoqc/MorphologyModule.py
@@ -4,10 +4,8 @@
 from histoqc.BaseImage import printMaskHelper, BaseImage
 from histoqc.array_adapter import ArrayAdapter
 from histoqc.array_adapter.typing import TYPE_ARRAY
-from skimage import io, morphology, measure
+from skimage import morphology, measure
 from skimage.util import img_as_ubyte
-from scipy import ndimage as ndi
-from typing import cast
 
 
 def removeSmallObjects(s: BaseImage, params):
@@ -109,6 +107,7 @@ def removeFatlikeTissue(s, params):
                              f"detectable! Downstream modules likely to be incorrect/fail")
     return
 
+
 def fillSmallHoles(s, params):
     logging.info(f"{s['filename']} - \tfillSmallHoles")
     adapter = s.image_handle.adapter
diff --git a/histoqc/TileExtractionModule.py b/histoqc/TileExtractionModule.py
index 65d0383..e3cf69e 100644
--- a/histoqc/TileExtractionModule.py
+++ b/histoqc/TileExtractionModule.py
@@ -6,7 +6,7 @@
 import os
 import json
 from histoqc.BaseImage import BaseImage
-from histoqc.array_adapter import ArrayDevice
+from histoqc.array_adapter import Device
 from typing import Callable, Dict, Any, List, Tuple, Union
 import numpy as np
 from PIL import Image, ImageDraw
@@ -36,6 +36,7 @@
 TYPE_BBOX_INT = Tuple[int, int, int, int]
 
 
+# noinspection PyUnusedLocal
 def default_screen_identity(img: np.ndarray):
     return True
 
@@ -570,8 +571,9 @@ def extract(s: BaseImage, params: Dict[PARAMS, Any]):
         tile_stride = int(params.get('tile_stride', 256))
         tissue_thresh = float(params.get('tissue_ratio', 0.5))
         # no added value from GPU acceleration (except for read_region) as the procedure is sequential
-        img_use_for_tiles = adapter.move_to_device(s.getImgThumb(s["image_work_size"]), ArrayDevice.CPU)
-        mask_use_for_tiles = adapter.move_to_device(s['img_mask_use'], ArrayDevice.CPU)
+        img_use_for_tiles, mask_use_for_tiles = adapter.curate_arrays_device(s.getImgThumb(s["image_work_size"]),
+                                                                             s['img_mask_use'],
+                                                                             device=Device.build(Device.DEVICE_CPU))
         image_handle = s.image_handle
         img_w, img_h = image_handle.dimensions
 
diff --git a/histoqc/__main__.py b/histoqc/__main__.py
index 430d33f..8462cc1 100644
--- a/histoqc/__main__.py
+++ b/histoqc/__main__.py
@@ -8,6 +8,7 @@
 import sys
 import time
 from functools import partial
+from typing import Tuple, Optional, List
 
 from histoqc._pipeline import BatchedResultFile
 from histoqc._pipeline import MultiProcessingLogManager
@@ -20,9 +21,55 @@
 from histoqc._worker import worker_setup
 from histoqc._worker import worker_success
 from histoqc._worker import worker_error
+from histoqc._worker import PARAM_SHARE, device_assign, KEY_ASSIGN
 from histoqc.config import read_config_template
 from histoqc.data import managed_pkg_data
 from histoqc.wsi_handles.constants import KEY_CUCIM
+from histoqc.array_adapter.adapter import cupy_installed
+from histoqc.import_wrapper.cupy_extra import cp
+
+
+def parse_config(args: argparse.Namespace) -> Tuple[configparser.ConfigParser, Optional[str]]:
+    config = configparser.ConfigParser()
+    msg = None
+    if not args.config:
+        msg = f"Configuration file not set (--config), using default"
+        config.read_string(read_config_template('default'))
+    elif os.path.exists(args.config):
+        config.read(args.config)  # Will read the config file
+    else:
+        msg = f"Configuration file {args.config} assuming to be a template...checking."
+        config.read_string(read_config_template(args.config))
+    return config, msg
+
+
+def _get_device_list(n_proc: int):
+    return list(range(n_proc)) if n_proc > 0 else [0]
+
+
+def parse_multiprocessing(args: argparse.Namespace,
+                          config: configparser.ConfigParser) -> Tuple[argparse.Namespace, List[int]]:
+    is_multiproc = args.nprocesses >= 0
+    is_cuda = KEY_CUCIM in config["BaseImage.BaseImage"].get("handles", "")
+
+    # if use cuda but without installation of dependencies - return
+    if not is_cuda or not cupy_installed():
+        return args, _get_device_list(args.nprocesses)
+    # guard
+    assert is_cuda and cp is not None, f"Enable CUDA but cupy is not installed"
+    # set spawn
+    if is_multiproc:
+        multiprocessing.set_start_method("spawn", force=True)
+    num_devices = cp.cuda.runtime.getDeviceCount()
+    # n_proc cannot exceed num of GPUs.
+    assert num_devices > 0, f"Fail to detect usable CUDA devices"
+    if args.nprocesses > num_devices:
+        logging.warning(f"{__name__}: CUDA enabled but number of processes is greater than number of devices:"
+                        f"{args.nprocesses} > {num_devices}. Cutoff the number of processes to {num_devices}")
+    args.nprocesses = min(args.nprocesses, num_devices)
+    # device list --> if
+    device_list = _get_device_list(args.nprocesses)
+    return args, device_list
 
 
 @managed_pkg_data
@@ -69,28 +116,17 @@ def main(argv=None):
     # --- multiprocessing and logging setup -----------------------------------
     # todo: move config parsing above the mpm initialization and set the start method accordingly
     # multiprocessing.set_start_method("spawn")
+
     setup_logging(capture_warnings=True, filter_warnings='ignore')
-    mpm = multiprocessing.Manager()
-    lm = MultiProcessingLogManager('histoqc', manager=mpm)
 
     # --- parse the pipeline configuration ------------------------------------
+    config, conf_warn_msg = parse_config(args)
+    args, device_list = parse_multiprocessing(args, config)
+    mpm = multiprocessing.Manager()
+    lm = MultiProcessingLogManager('histoqc', manager=mpm)
+    if conf_warn_msg:
+        lm.logger.warning(conf_warn_msg)
 
-    config = configparser.ConfigParser()
-    if not args.config:
-        lm.logger.warning(f"Configuration file not set (--config), using default")
-        config.read_string(read_config_template('default'))
-    elif os.path.exists(args.config):
-        config.read(args.config)  # Will read the config file
-    else:
-        lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.")
-        config.read_string(read_config_template(args.config))
-
-    # todo: for cuda --> must use spawn method if CUDA is enabled.
-    # todo: however a better memory management scheme should be tested.
-    # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime
-    # todo: at this moment we simply override the args.nprocesses
-    if config["BaseImage.BaseImage"].get("handles") == KEY_CUCIM:
-        args.nprocesses = 0
     # --- provide models, pen and templates as fallbacks from package data ----
 
     managed_pkg_data.inject_pkg_data_fallback(config)
@@ -165,21 +201,27 @@ def main(argv=None):
         'outdir': args.outdir,
         'log_manager': lm,
         'lock': mpm.Lock(),
-        'shared_dict': mpm.dict(),
+        PARAM_SHARE: mpm.dict(),
         'num_files': num_files,
         'force': args.force,
     }
+    # init the dict of device assignment
+    _shared_state[PARAM_SHARE][KEY_ASSIGN] = mpm.dict()
     failed = mpm.list()
     setup_plotting_backend(lm.logger)
 
     try:
-        if args.nprocesses > 1:
+        # todo: for cuda --> must use spawn method if CUDA is enabled.
+        # todo: however a better memory management scheme should be tested.
+        # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime
+        # todo: at this moment we simply override the args.nprocesses
+
+        if args.nprocesses > 0:
 
             with lm.logger_thread():
-                print(args.nprocesses)
                 with multiprocessing.Pool(processes=args.nprocesses,
                                           initializer=worker_setup,
-                                          initargs=(config,)) as pool:
+                                          initargs=(config, device_list, _shared_state)) as pool:
                     try:
                         for idx, file_name in enumerate(files):
                             _ = pool.apply_async(
@@ -197,6 +239,7 @@ def main(argv=None):
         else:
             for idx, file_name in enumerate(files):
                 try:
+                    device_assign(device_list, _shared_state[PARAM_SHARE])
                     _success = worker(idx, file_name, **_shared_state)
                 except Exception as exc:
                     worker_error(exc, failed)
diff --git a/histoqc/_worker.py b/histoqc/_worker.py
index ecf4e6a..163a998 100644
--- a/histoqc/_worker.py
+++ b/histoqc/_worker.py
@@ -1,18 +1,41 @@
 """histoqc worker functions"""
+import multiprocessing
 import os
 import shutil
 import traceback
 from histoqc.BaseImage import BaseImage
 from histoqc._pipeline import load_pipeline
 from histoqc._pipeline import setup_plotting_backend
+from typing import Dict, List, Optional
+from multiprocessing import managers
+
+KEY_ASSIGN: str = 'device_assign'
+PARAM_SHARE: str = 'shared_dict'
 
 
 # --- worker functions --------------------------------------------------------
+def id_assign_helper(device_id_list: List[int], assign_dict: managers.DictProxy):
+    pid = os.getpid()
+    for device_id in device_id_list:
+        if device_id not in assign_dict.values():
+            assign_dict[pid] = device_id
+            return
+
+
+def device_assign(device_id_list: List[int], shared_dict: managers.DictProxy):
+    """Initializer to configure each worker with a specific GPU."""
+    shared_dict[KEY_ASSIGN] = shared_dict.get(KEY_ASSIGN, None)
+    assert shared_dict[KEY_ASSIGN] is not None
+    assert KEY_ASSIGN in shared_dict
+    id_assign_helper(device_id_list, shared_dict[KEY_ASSIGN])
+
 
-def worker_setup(c):
+def worker_setup(c, device_id_list: List[int], state: Dict):
     """needed for multiprocessing worker setup"""
     setup_plotting_backend()
+    shared_dict = state[PARAM_SHARE]
     load_pipeline(config=c)
+    device_assign(device_id_list, shared_dict)
 
 
 def worker(idx, file_name, *,
@@ -35,9 +58,15 @@ def worker(idx, file_name, *,
     os.makedirs(fname_outdir)
 
     log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")
+    device_id = shared_dict[KEY_ASSIGN].get(os.getpid(), None)
+    if device_id is None:
+        log_manager.logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id."
+                                   f"Default: use 0 for CUDA devices.")
+    s: Optional[BaseImage] = None
 
     try:
-        s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")))
+        s: BaseImage = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")),
+                                 device_id=device_id)
         for process, process_params in process_queue:
             process_params["lock"] = lock
             process_params["shared_dict"] = shared_dict
@@ -45,7 +74,10 @@ def worker(idx, file_name, *,
             s["completed"].append(process.__name__)
     except Exception as exc:
         # reproduce histoqc error string
-        _oneline_doc_str = exc.__doc__.replace('\n', '')
+        if s is not None:
+            s.image_handle.release()
+        print(f"DBG: {__name__}: {exc}")
+        _oneline_doc_str = exc.__doc__.replace('\n', '') if exc.__doc__ is not None else ''
         err_str = f"{exc.__class__} {_oneline_doc_str} {exc}"
         trace_string = traceback.format_exc()
         log_manager.logger.error(
diff --git a/histoqc/array_adapter/__init__.py b/histoqc/array_adapter/__init__.py
index c43f502..4ed63c5 100644
--- a/histoqc/array_adapter/__init__.py
+++ b/histoqc/array_adapter/__init__.py
@@ -1,2 +1,2 @@
 from .func_mapping import FUNC_MAP
-from .adapter import ArrayAdapter, ArrayDevice
+from .adapter import ArrayAdapter, ArrayDeviceType, Device
diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py
index 17c46e3..52b3a15 100644
--- a/histoqc/array_adapter/adapter.py
+++ b/histoqc/array_adapter/adapter.py
@@ -3,7 +3,7 @@
 from histoqc.array_adapter.func_mapping import FUNC_MAP
 import numpy as np
 from numbers import Number
-from typing import Callable, Mapping, Tuple, Optional, Any, Iterable
+from typing import Callable, Mapping, Tuple, Optional, Any, Iterable, Dict
 from typing_extensions import Self, TypeGuard
 from histoqc.import_wrapper.cupy_extra import cupy as cp
 from enum import Enum
@@ -11,6 +11,7 @@
 import functools
 from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv
 import skimage
+import re
 
 
 def cupy_installed() -> bool:
@@ -21,7 +22,7 @@ def cupy_installed() -> bool:
         return False
 
 
-class ArrayDevice(Enum):
+class ArrayDeviceType(Enum):
     CPU: str = 'cpu'
     CUDA: str = 'cuda'
 
@@ -36,7 +37,7 @@ def from_str(cls, str_val: str):
         return cls(str_val)
 
     @classmethod
-    def build(cls, value: str | Self):
+    def build(cls, value: str | Self) -> Self:
         if isinstance(value, cls):
             return value
         if isinstance(value, str):
@@ -44,14 +45,94 @@ def build(cls, value: str | Self):
         raise TypeError(f'Unexpected type {type(value)}')
 
 
+class Device:
+    __device_type: ArrayDeviceType
+    __device_id: Optional[int]
+    __instances: Dict[Tuple[ArrayDeviceType, Optional[int]], Self] = dict()
+    _is_initialized: bool
+    DEFAULT_ID: int = 0
+
+    DEVICE_CPU: str = 'cpu'
+    DEVICE_CUDA: str = 'cuda'
+
+    def is_cpu(self) -> bool:
+        return self.__device_type is ArrayDeviceType.CPU
+
+    def is_cuda(self) -> bool:
+        return self.__device_type is ArrayDeviceType.CUDA
+
+    @property
+    def device_type(self) -> ArrayDeviceType:
+        return self.__device_type
+
+    @property
+    def device_id(self) -> Optional[int]:
+        return self.__device_id
+
+    def __init__(self, device_type: ArrayDeviceType, device_id: Optional[int] = None) -> None:
+        if not hasattr(self, "_is_initialized") or not self._is_initialized:
+            self.__device_type = device_type
+            if self.is_cuda() and device_id is None:
+                device_id = Device.DEFAULT_ID
+            self.__device_id = device_id
+        self._is_initialized = True
+
+    def __repr__(self) -> str:
+        dev_id = f":{self.device_id}" if self.device_type is ArrayDeviceType.CUDA else ""
+        return f"{self.device_type.value}{dev_id}"
+
+    def __reduce__(self):
+        # Return a tuple representing the pickling state
+        return self.__class__, (self.__device_type, self.__device_id)
+
+    def __new__(cls, device_type: ArrayDeviceType, device_id: Optional[int] = None):
+        device_id = device_id if device_type is ArrayDeviceType.CUDA else None
+        if device_type is ArrayDeviceType.CUDA and device_id is None:
+            device_id = cls.DEFAULT_ID
+
+        key = (device_type, device_id)
+        if key not in cls.__instances:
+            inst = super().__new__(cls)
+            cls.__instances[key] = inst
+        return cls.__instances[key]
+
+    @classmethod
+    def parse_input(cls, device: str | int) -> Tuple[ArrayDeviceType, Optional[int]]:
+        assert device is not None, f"device must not be None"
+        if isinstance(device, int):
+            return ArrayDeviceType.CUDA, device
+
+        assert isinstance(device, str), f"device must either be int (GPU:device) or str (cpu|cuda)[:device]"
+        regex = r'^(cpu|cuda)(:(\d+))?$'
+        match = re.match(regex, device)
+
+        assert match is not None and len(match.groups()) == 3, f"Unexpected input format: {device}"
+        groups = match.groups()
+
+        if groups[0] == cls.DEVICE_CPU:
+            # Handle the "cpu" case
+            return ArrayDeviceType.CPU, None
+        elif groups[0] == cls.DEVICE_CUDA and groups[2] is None:
+            return ArrayDeviceType.CUDA, cls.DEFAULT_ID
+        elif groups[0] == cls.DEVICE_CUDA and groups[2] is not None:
+            device_id = int(groups[2])
+            return ArrayDeviceType.CUDA, device_id
+        raise ValueError(f"Unexpected input format: {device}")
+
+    @classmethod
+    def build(cls, device: str | int):
+        device_type, device_id = cls.parse_input(device)
+        return cls(device_type, device_id)
+
+
 class ArrayAdapter(Callable):
 
     func_map: Mapping[Callable, Callable]
-    input_device: Optional[str | ArrayDevice]
-    output_device: Optional[str | ArrayDevice]
-    contingent_type: ArrayDevice
+    input_device: Optional[str | Device]
+    output_device: Optional[str | Device]
+    contingent_device: Device
 
-    TYPE_CONTINGENT_DEFAULT: ArrayDevice = ArrayDevice.CUDA
+    id: int
 
     @staticmethod
     def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]:
@@ -69,63 +150,81 @@ def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP:
         return arr.get()
 
     @staticmethod
-    def to_cupy(arr: TYPE_ARRAY | Number, copy: bool = False) -> TYPE_CP:
+    def to_cupy(arr: TYPE_ARRAY | Number, device: Device, copy: bool = False) -> TYPE_CP:
         assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \
             f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}"
-        return cp.array(arr, copy=copy)
+        assert device is not None and isinstance(device, Device) and device.is_cuda(), f"{device} is not CUDA device"
+        with cp.cuda.Device(device.device_id):
+            return cp.array(arr, copy=copy)
 
     @staticmethod
-    def array_device_type(arr: TYPE_ARRAY) -> ArrayDevice:
-        on_cpu = isinstance(arr, np.ndarray)
-        return ArrayDevice.from_bool(on_cpu)
+    def array_device_type(arr: TYPE_ARRAY) -> Device:
+        on_cpu = ArrayAdapter.is_numpy(arr)
+        if on_cpu:
+            return Device.build(Device.DEVICE_CPU)
+        assert cupy_installed() and ArrayAdapter.is_cupy(arr)
+        return Device.build(f"{Device.DEVICE_CUDA}:{arr.device.id}")
 
     @staticmethod
     def is_array(arr: TYPE_ARRAY) -> bool:
-        return isinstance(arr, np.ndarray) or (cupy_installed() and isinstance(arr, cp.ndarray))
+        return ArrayAdapter.is_numpy(arr) or ArrayAdapter.is_cupy(arr)
 
     @classmethod
-    def new_array(cls, arr: Any, array_device: ArrayDevice) -> TYPE_ARRAY:
+    def new_array(cls, arr: Any, array_device: Device) -> TYPE_ARRAY:
         if cls.is_array(arr):
-            return cls.move_to_device(arr, array_device, copy=False)
+            return cls._move_to_device(arr, array_device, copy=True)
         if isinstance(arr, Iterable):
-            arr = cls.curate_array_device(*arr, device=array_device, copy=False)
-        if array_device is array_device.CUDA:
+            arr = cls.curate_arrays_device(*arr, device=array_device, copy=True)
+        if array_device.is_cuda():
             assert cupy_installed()
-            return cp.asarray(arr)
-        elif array_device is array_device.CPU:
+            with cp.cuda.Device(array_device.device_id):
+                return cp.asarray(arr)
+        elif array_device.DEVICE_CPU:
             return np.asarray(arr)
-        raise TypeError(f'Unexpected device {ArrayDevice}')
+        raise TypeError(f'Unexpected device {ArrayDeviceType}')
 
     def asarray(self, arr: Any) -> TYPE_ARRAY:
         return self.__class__.new_array(arr, self.output_device)
 
     @classmethod
-    def move_to_device(cls,
-                       arr: Optional[TYPE_ARRAY],
-                       device: Optional[ArrayDevice], copy: bool = False) -> TYPE_ARRAY:
+    def _move_to_device(cls,
+                        arr: Optional[TYPE_ARRAY],
+                        device: Optional[Device], copy: bool = False) -> TYPE_ARRAY:
         # structural match > py3.10
         if device is None or not cls.is_array(arr):
             return arr
-        if device is ArrayDevice.CPU:
+        assert device is not None
+        if device.is_cpu():
             return ArrayAdapter.to_numpy(arr, copy=copy)
-        elif device is ArrayDevice.CUDA:
-            return ArrayAdapter.to_cupy(arr, copy=copy)
+        elif device.is_cuda():
+            return ArrayAdapter.to_cupy(arr, device, copy=copy)
         raise ValueError(f"Unsupported device: {device}")
 
     def sync(self, arr: Optional[TYPE_ARRAY | Number], copy: bool = False) -> TYPE_ARRAY:
         if not self.__class__.is_array(arr):
             return arr
-        return self.__class__.move_to_device(arr, device=self.output_device, copy=copy)
+        return self.__class__._move_to_device(arr, device=self.output_device, copy=copy)
 
     @classmethod
-    def curate_device_helper(cls, output: TYPE_ARRAY, device: Optional[ArrayDevice], copy: bool):
-        if output is not None and cls.is_array(output):
-            output = cls.move_to_device(output, device, copy=copy)
-        return output
+    def curate_device_helper(cls, arr: TYPE_ARRAY, device: Optional[Device], copy: bool):
+        if arr is not None and cls.is_array(arr):
+            arr = cls._move_to_device(arr, device, copy=copy)
+        return arr
 
     @classmethod
-    def curate_array_device(cls, *arrays: TYPE_ARRAY,
-                            device: Optional[ArrayDevice], copy: bool) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+    def curate_arrays_device(cls, *arrays: TYPE_ARRAY,
+                             device: Optional[Device],
+                             copy: bool = False) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        """Curate the device type of one or more arrays.
+
+        Args:
+            *arrays:
+            device:
+            copy:
+
+        Returns:
+
+        """
         # already an array - no need to recursively unpack
         if cls.is_array(arrays):
             return cls.curate_device_helper(arrays, device, copy)
@@ -142,74 +241,99 @@ def curate_array_device(cls, *arrays: TYPE_ARRAY,
 
     @staticmethod
     def get_api(cpu_func: Callable,
-                func_map: Mapping[Callable, Callable], device_type: ArrayDevice) -> Tuple[Callable, ArrayDevice]:
-        if device_type == ArrayDevice.CPU:
-            return cpu_func, ArrayDevice.CPU
+                func_map: Mapping[Callable, Callable],
+                device: Optional[Device]) -> Tuple[Callable, Device]:
+        if device is None:
+            logging.warning(f"Device unspecified in both input data and input device. Try: gpu:0")
+            device = Device.build(Device.DEVICE_CUDA)
+        if device.is_cpu():
+            return cpu_func, device
+        assert device.is_cuda()
         mapped = func_map.get(cpu_func, None)
         if mapped is not None:
-            return mapped, ArrayDevice.CUDA
+            return mapped, device
         # if not implemented
         func_name = getattr(cpu_func, '__qualname__', cpu_func.__name__)
         logging.info(f"{__name__}: {func_name} does not have a GPU implementation. Revert to CPU")
-        return cpu_func, ArrayDevice.CPU
+        return cpu_func, Device.build(Device.DEVICE_CPU)
+
+    @classmethod
+    def call_helper(cls, func_in: Optional[TYPE_ARRAY], func_device: Device, func: Callable, *curated_args,
+                    **curated_kwargs):
+        if cls.is_cupy(func_in) or (func_in is None and func_device.is_cuda()):
+            assert func_in is None or func_in.device.id == func_device.device_id, \
+                f"{func_device} mismatch {func_in is None or func_in.device}"
+            with cp.cuda.Device(func_device.device_id):
+                return func(func_in, *curated_args, **curated_kwargs)
+        else:
+            return func(func_in, *curated_args, **curated_kwargs)
 
     @classmethod
     def unified_call(cls,
                      cpu_func: Callable,
                      func_map: Mapping[Callable, Callable],
-                     input_device: Optional[str | ArrayDevice],
-                     output_device: Optional[str | ArrayDevice],
+                     input_device: Optional[Device],
+                     output_device: Optional[Device],
                      data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
         # use input_device to override the current device, if not None
-        data = cls.curate_array_device(data, device=input_device, copy=False)
-        input_type = cls.array_device_type(data)
-        # attempt to fetch the op, revert to CPU if GPU impl is not available
+        data = cls.curate_arrays_device(data, device=input_device, copy=False)
+        # if data is None --> use input device.
+        # if input_device is None, by default will invoke GPU interface
+        input_type = cls.array_device_type(data) if data is not None else input_device
+        # attempt to fetch the op, revert to CPU if GPU impl is not available (func=GPU impl, func_device=cuda
         func, func_device = cls.get_api(cpu_func, func_map, input_type)
-        func_in = cls.curate_array_device(data, device=func_device, copy=False)
+        func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True)
 
-        curated_args = cls.curate_array_device(*args, device=func_device, copy=False)
+        curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True)
         curated_kwargs = dict()
-
         for k, v in kwargs.items():
-            curated_kwargs[k] = cls.curate_array_device(v, device=func_device, copy=False)
+            curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=True)
 
-        output = func(func_in, *curated_args, **curated_kwargs)
+        output = cls.call_helper(func_in, func_device, func, *curated_args, **curated_kwargs)
         # only move the output around if the output is an array
         if isinstance(output, tuple):
-            return cls.curate_array_device(*output, device=output_device, copy=False)
-        return cls.curate_array_device(output, device=output_device, copy=False)
+            return cls.curate_arrays_device(*output, device=output_device, copy=True)
+        return cls.curate_arrays_device(output, device=output_device, copy=True)
 
     @classmethod
-    def _validate_device(cls, device_type: Optional[ArrayDevice]) -> Optional[ArrayDevice]:
-        if device_type is None:
+    def _validate_device(cls, device: Optional[Device | str | int]) -> Optional[Device]:
+        if device is None:
             return None
-        if device_type is ArrayDevice.CPU:
-            return device_type
-        assert device_type is ArrayDevice.CUDA, f"Unsupported device_type: {device_type}"
+        if isinstance(device, (str, int)):
+            device = Device.build(device)
+
+        assert isinstance(device, Device), f"{type(device)}"
+
+        if device.is_cpu():
+            return device
+
+        assert device.is_cuda(), f"Unsupported device_type: {device}"
         if not cupy_installed():
             logging.info(f"Cupy is not installed. Revert to CPU")
-            return ArrayDevice.CPU
-        return device_type
+            return Device.build(Device.DEVICE_CPU)
+        return device
 
     def __init__(self,
-                 input_device: Optional[str | ArrayDevice],
-                 output_device: Optional[str | ArrayDevice],
+                 input_device: Optional[str | int | Device],
+                 output_device: Optional[str | int | Device],
                  func_map: Mapping[Callable, Callable],
-                 contingent_type: ArrayDevice,
+                 contingent_device: Device,
                  ):
         self.input_device = self.__class__._validate_device(input_device)
         self.output_device = self.__class__._validate_device(output_device)
         self.func_map = func_map
-        self.contingent_type = contingent_type
+        self.contingent_device = contingent_device
 
     @classmethod
     def build(cls,
-              input_device: Optional[str | ArrayDevice],
-              output_device: Optional[str | ArrayDevice],
+              input_device: Optional[str | int | Device],
+              output_device: Optional[str | int | Device],
               func_map: Mapping[Callable, Callable] = FUNC_MAP,
-              contingent_type: ArrayDevice = TYPE_CONTINGENT_DEFAULT):
+              contingent_device: Device = None) -> Self:
+        if contingent_device is None:
+            contingent_device = output_device
         return cls(input_device=input_device, output_device=output_device, func_map=func_map,
-                   contingent_type=contingent_type)
+                   contingent_device=contingent_device)
 
     def apply(self, /, cpu_func: Callable, data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
         return self.unified_call(cpu_func, self.func_map, self.input_device, self.output_device,
@@ -227,90 +351,100 @@ def __sync_device_output_helper(*arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_
 
     @classmethod
     def device_sync_all_helper(cls, *arrays: TYPE_ARRAY,
-                               array_device: Optional[ArrayDevice],
-                               contingent_type: Optional[ArrayDevice]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+                               array_device: Optional[Device],
+                               contingent_device: Optional[Device]) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
         assert isinstance(arrays, tuple), f"input check. {type(arrays)} is not a tuple"
         if array_device is not None:
-            return cls.__sync_device_output_helper(tuple(cls.move_to_device(arr, array_device) for arr in arrays))
+            return cls.__sync_device_output_helper(tuple(cls._move_to_device(arr, array_device) for arr in arrays))
         assert array_device is None
-        has_contingent_device = any(cls.array_device_type(arr) is contingent_type for arr in arrays)
+        has_contingent_device = any(cls.array_device_type(arr) is contingent_device for arr in arrays)
         if has_contingent_device:
-            assert contingent_type is not None
+            assert contingent_device is not None
             return cls.__sync_device_output_helper(cls.device_sync_all_helper(*arrays,
-                                                                              array_device=contingent_type,
-                                                                              contingent_type=None))
+                                                                              array_device=contingent_device,
+                                                                              contingent_device=None))
         return cls.__sync_device_output_helper(arrays)
 
     def device_sync_all(self, *arrays: TYPE_ARRAY) -> TYPE_ARRAY | Tuple[TYPE_ARRAY, ...]:
+        """Synchronize the device of all arrays to be
+
+        Args:
+            *arrays:
+
+        Returns:
+
+        """
         return self.__class__.device_sync_all_helper(*arrays, array_device=self.output_device,
-                                                     contingent_type=self.contingent_type)
+                                                     contingent_device=self.contingent_device)
 
     @classmethod
     def binary_operation(cls, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY,
-                         input_device: Optional[ArrayDevice],
-                         output_device: Optional[ArrayDevice],
-                         contingent_type: Optional[ArrayDevice],
+                         input_device: Optional[Device],
+                         output_device: Optional[Device],
+                         contingent_device: Optional[Device],
                          op: Callable) -> TYPE_ARRAY:
         arr1, arr2 = cls.device_sync_all_helper(arr1, arr2, array_device=input_device,
-                                                contingent_type=contingent_type)
+                                                contingent_device=contingent_device)
         result: TYPE_ARRAY = op(arr1, arr2)
-        return cls.curate_array_device(result, device=output_device, copy=False)
+        return cls.curate_arrays_device(result, device=output_device, copy=True)
 
     def and_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=and_)
 
     def or_(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=or_)
 
     def add(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=add)
 
     def sub(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=sub)
 
     def mul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=mul)
 
     def matmul(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=matmul)
 
     def truediv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=truediv)
 
     def floordiv(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=floordiv)
 
     def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
         return self.__class__.binary_operation(arr1, arr2, input_device=self.input_device,
                                                output_device=self.output_device,
-                                               contingent_type=self.contingent_type,
+                                               contingent_device=self.contingent_device,
                                                op=xor)
 
     @classmethod
-    def imsave(cls, filename: str, arr: TYPE_ARRAY):
-        arr = cls.curate_array_device(arr, device=ArrayDevice.CPU, copy=False)
-        return skimage.io.imsave(filename, arr)
+    def imsave(cls, filename: str, arr: TYPE_ARRAY, **kwargs):
+        logging.debug(f"{__name__}: SHAPE DBG {arr.shape}")
+        arr = cls.curate_arrays_device(arr, device=Device.build(Device.DEVICE_CPU), copy=True)
+        logging.debug(f"{__name__}: TYPE DBG {type(arr)}")
+        return skimage.io.imsave(filename, arr, **kwargs)
diff --git a/histoqc/array_adapter/func_mapping.py b/histoqc/array_adapter/func_mapping.py
index aa55b46..8ecc0ca 100644
--- a/histoqc/array_adapter/func_mapping.py
+++ b/histoqc/array_adapter/func_mapping.py
@@ -1,5 +1,6 @@
 import scipy.ndimage
 import skimage
+import sklearn
 from typing import Callable, Mapping
 from scipy import ndimage as sci_ndi
 import numpy as np
@@ -9,7 +10,13 @@
     from cupyx.scipy import signal as cu_signal
     from cucim import skimage as cu_skimage
     from cupyx.scipy import ndimage as cu_ndi
+    from sklearn import naive_bayes as sk_naive_bayes
+    from cuml import naive_bayes as cuml_naive_bayes
+    import cuml
+    from cuml import ensemble as cuml_ensemble
+    from sklearn import ensemble as sk_ensemble
 
+    # noinspection PyUnresolvedReferences
     FUNC_MAP: Mapping[Callable, Callable] = {
         skimage.color.convert_colorspace: cu_skimage.color.convert_colorspace,
         skimage.color.rgb2gray: cu_skimage.color.rgb2gray,
@@ -45,7 +52,9 @@
         skimage.filters.rank.minimum: None,  # cu_skimage.morphology.erosion,
         sci_signal.convolve2d: cu_signal.convolve2d,
         sci_ndi.generate_binary_structure: cu_ndi.generate_binary_structure,
-        np.digitize: cp.digitize
+        np.digitize: cp.digitize,
+        # sk_naive_bayes.GaussianNB: cuml_naive_bayes.GaussianNB,
+        # sk_ensemble.RandomForestClassifier: cuml_ensemble.RandomForestClassifier,
     }
 except ImportError:
     FUNC_MAP = dict()
diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py
index c67d3e1..33d6586 100644
--- a/histoqc/import_wrapper/cupy_extra.py
+++ b/histoqc/import_wrapper/cupy_extra.py
@@ -1,5 +1,7 @@
 try:
     import cupy
+    # cupy.cuda.set_allocator(None)
+    cupy.cuda.set_allocator(cupy.cuda.MemoryPool().malloc)
 except ImportError:
     cupy = None
 finally:
diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py
index 9a65a71..56bf82e 100644
--- a/histoqc/wsi_handles/base.py
+++ b/histoqc/wsi_handles/base.py
@@ -3,11 +3,11 @@
 
 from histoqc.import_wrapper import dynamic_import
 import logging
-from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic
+from typing import Sequence, TypeVar, Tuple, List, Union, Dict, Callable, Mapping, Generic, Optional, cast
 import numpy as np
 from PIL.Image import Image as PILImage
 from typing_extensions import final
-from histoqc.array_adapter import ArrayDevice, ArrayAdapter
+from histoqc.array_adapter import ArrayDeviceType, ArrayAdapter, Device
 import os
 
 from histoqc.wsi_handles.constants import WSI_HANDLES, HANDLE_DELIMITER
@@ -22,6 +22,11 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]):
     handle: T
     fname: str
     _adapter: ArrayAdapter
+    _device: Device
+
+    @property
+    def device(self) -> Device:
+        return self._device
 
     @staticmethod
     def curate_shorter_edge(width, height, limit, aspect_ratio):
@@ -188,7 +193,7 @@ def array_shape(arr: ARRAY) -> Tuple[int, int]:
 
     @staticmethod
     @abstractmethod
-    def backend_to_array(region: Union[Backend, ARRAY]) -> ARRAY:
+    def backend_to_array(region: Union[Backend, ARRAY], device: Optional[Device]) -> ARRAY:
         ...
     
     def read_region(self, location, level, size, **kwargs) -> PILImage:
@@ -205,7 +210,7 @@ def read_macro(self) -> Backend:
 
     @classmethod
     @abstractmethod
-    def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int]) -> ARRAY:
+    def region_resize_arr(cls, data: ARRAY, new_size_wh: Tuple[int, int], device: Optional[Device]) -> ARRAY:
         ...
 
     @abstractmethod
@@ -216,12 +221,12 @@ def curated_best_level_for_downsample(self, downsample_factor: float) -> Tuple[i
         relative_down_factors_idx = [np.isclose(i / downsample_factor, 1, atol=.01) for i in self.level_downsamples]
         level = np.where(relative_down_factors_idx)[0]
         if level.size:
-            return level[0], True
+            return cast(int, level[0]), True
         return self.get_best_level_for_downsample(downsample_factor), False
 
     @staticmethod
     @abstractmethod
-    def grid_stack(grid: List[List[ARRAY]]):
+    def grid_stack(grid: List[List[ARRAY]], device: Optional[device]) -> ARRAY:
         ...
 
     def resize_tile_downward(self, target_downsampling_factor, level,
@@ -260,15 +265,16 @@ def resize_tile_downward(self, target_downsampling_factor, level,
                                                      **read_region_kwargs)
                 if np.shape(closest_region)[-1] == 4:
                     closest_region = self.backend_rgba2rgb(closest_region)
-                closest_region_arr = self.__class__.backend_to_array(closest_region)
+                closest_region_arr = self.__class__.backend_to_array(closest_region, self.device)
                 target_region = self.__class__.region_resize_arr(closest_region_arr,
-                                                                 (win_down_width, win_down_height))
+                                                                 (win_down_width, win_down_height),
+                                                                 device=self.device)
                 row_piece.append(target_region)
             # row_piece = np.concatenate(row_piece, axis=0)
             grid.append(row_piece)
         # grid = np.concatenate(output, axis=1)
         #
-        return self.__class__.grid_stack(grid)
+        return self.__class__.grid_stack(grid, device=self.device)
 
     def best_thumb(self, x: int, y: int, dims: Tuple[int, int],
                    target_sampling_factor: float, **read_region_kwargs) -> ARRAY:
@@ -276,15 +282,15 @@ def best_thumb(self, x: int, y: int, dims: Tuple[int, int],
         # get thumb from og
         if not self.has_bounding_box:
             max_dim = dims[0] if dims[0] > dims[1] else dims[1]
-            return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim)))
+            return self.__class__.backend_to_array(self.get_thumbnail((max_dim, max_dim)), device=self.device)
 
         (level, is_exact_level) = self.curated_best_level_for_downsample(target_sampling_factor)
 
         # check if to get the existing level
         if is_exact_level:
             backend: Backend = self.read_region((x, y), level, dims)
-            return self.__class__.backend_to_array(self.backend_rgba2rgb(backend)) \
-                if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend)
+            return self.__class__.backend_to_array(self.backend_rgba2rgb(backend), device=self.device) \
+                if np.shape(backend)[-1] == 4 else self.__class__.backend_to_array(backend, device=self.device)
         # scale down the thumb img from the next high level
         else:
             return self.resize_tile_downward(target_sampling_factor, level, win_size=2048, **read_region_kwargs)
@@ -311,13 +317,14 @@ def parse_wsi_handles(handle_list: str | List[str], delimiter: str,
 
     @classmethod
     def __create_handle(cls, fname: str,
-                        handle_class_list: List[Callable[[str], "WSIImageHandle"]]) -> "WSIImageHandle":
+                        handle_class_list: List[Callable[[str, Optional[int]], "WSIImageHandle"]],
+                        device_id: Optional[int]) -> "WSIImageHandle":
         image_handle = None
         assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file"
         for handle_class in handle_class_list:
             # noinspection PyBroadException
             try:
-                image_handle = handle_class(fname)
+                image_handle = handle_class(fname, device_id)
                 break
             except Exception:
                 # current wsi handle class doesn't support this file
@@ -333,16 +340,18 @@ def __create_handle(cls, fname: str,
 
     @classmethod
     @final
-    def build_handle(cls, fname: str, handles: str) -> "WSIImageHandle":
+    def build_handle(cls, fname: str, handles: str, device_id: Optional[int]) -> "WSIImageHandle":
         # get handles list
         module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES)
         handle_class_list = dynamic_import(module_list, attr_list, return_first=False)
-        image_handle = cls.__create_handle(fname, handle_class_list)
+        image_handle = cls.__create_handle(fname, handle_class_list, device_id)
         return image_handle
 
-    def __init__(self, fname: str):
+    def __init__(self, fname: str, device_id: Optional[int]):
         self.fname = fname
-        self._adapter = ArrayAdapter.build(input_device=self.device, output_device=self.device)
+        self._device = Device(self.device_type, device_id)
+        self._adapter = ArrayAdapter.build(input_device=self._device, output_device=self._device,
+                                           contingent_device=self._device)
 
     @abstractmethod
     def close_handle(self):
@@ -357,9 +366,13 @@ def is_closed(self):
 
     @property
     @abstractmethod
-    def device(self) -> ArrayDevice:
+    def device_type(self) -> ArrayDeviceType:
         raise NotImplementedError
 
     @property
     def adapter(self) -> ArrayAdapter:
         return self._adapter
+
+    @abstractmethod
+    def release(self):
+        ...
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
index df23ee3..d9c6a70 100644
--- a/histoqc/wsi_handles/cuimage_handle.py
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -1,43 +1,61 @@
 from __future__ import annotations
+
+import skimage.util
 from PIL.Image import Image as PILImage
 from cucim.clara import CuImage
 from .base import WSIImageHandle
+import traceback
 from PIL import Image
 from ..import_wrapper.openslide import openslide
 import cupy as cp
-from typing import List, Union, Tuple, Mapping
+from typing import List, Union, Tuple, Mapping, Optional
 from typing import cast
 from lazy_property import LazyProperty
 import numpy as np
 from cucim import skimage as c_skimage
-from histoqc.array_adapter import ArrayDevice
-import gc
+from histoqc.array_adapter import ArrayDeviceType, Device
+from types import MappingProxyType
+import logging
+
+
+DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA)
 
 
 class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
 
-    handle: CuImage
+    handle: Optional[CuImage]
     fname: str
+    _associated_images: Optional[Mapping]
 
     # TODO: standalone parser of vendor information
-    dummy_handle: openslide.OpenSlide
+    dummy_handle: Optional[openslide.OpenSlide]
 
     def backend_rgba2rgb(self, img: CuImage) -> CuImage:
         # todo: it appears that CuImage does not take care of the alpha channel at all.
         return img
 
     @classmethod
-    def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int]) -> cp.ndarray:
-        w, h, *_ = new_size_wh
-        arr = cp.array(data)
-        return c_skimage.transform.resize(arr, output_shape=(h, w))
+    def validate_device(cls, device: Optional[Device]):
+        device = DEFAULT_DEVICE if device is None else device
+        assert isinstance(device, Device)
+        return device
 
-    def __init__(self, fname: str):
-        super().__init__(fname)
+    @classmethod
+    def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device: Optional[Device]) -> cp.ndarray:
+        device = cls.validate_device(device)
+        with cp.cuda.Device(device.device_id):
+            w, h, *_ = new_size_wh
+            arr = cp.array(data)
+            return c_skimage.transform.resize(arr, output_shape=(h, w))
+
+    def __init__(self, fname: str, device_id: Optional[int]):
+        super().__init__(fname, device_id)
+        self._associated_images = None
         self.handle = CuImage(fname)
         # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a
         # todo - native unified metadata interface for different vendors.
         self.dummy_handle = openslide.OpenSlide(fname)
+        logging.debug(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}")
 
     @LazyProperty
     def background_color(self):
@@ -107,18 +125,23 @@ def get_thumbnail(self, new_dim):
 
         """
         # from openslide
-        downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim)))
-        level = self.get_best_level_for_downsample(downsample)
-        thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level]))
-        # resize
-        thumb_cp = cp.array(thumb, copy=False)
-        target_w, target_h = (x // int(downsample) for x in self.dimensions)
-        aspect_ratio = self.dimensions[0] / self.dimensions[1]
-
-        target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
-        resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w))
-
-        return c_skimage.util.img_as_ubyte(resized)
+        with cp.cuda.Device(self.device.device_id):
+            downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim)))
+            level = self.get_best_level_for_downsample(downsample)
+            target_w, target_h = (x // int(downsample) for x in self.dimensions)
+            aspect_ratio = self.dimensions[0] / self.dimensions[1]
+            # resize
+            thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level]))
+            try:
+                thumb_cp = cp.array(thumb, copy=True)
+                target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
+                resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), clip=True)
+                return c_skimage.util.img_as_ubyte(resized)
+            except Exception:
+                logging.warning(f"{__name__} - {self.fname}: OOM on {self.device.device_id}. {traceback.format_exc()}")
+                thumb_np = np.array(thumb, copy=True)
+                thumb_np = skimage.util.img_as_ubyte(thumb_np)
+                return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB")
 
     def get_best_level_for_downsample(self, down_factor: float) -> int:
         """Return the largest level that's smaller than the target downsample factor, consistent with openslide.
@@ -134,14 +157,19 @@ def get_best_level_for_downsample(self, down_factor: float) -> int:
         down_indices = np.where(level_downsamples_arr <= down_factor)[0]
         down_values = level_downsamples_arr[down_indices]
         # find the indices of the down_indices that points to the best downsample factor value
-        return down_indices[down_values.argmax()]
+        return cast(int, down_indices[down_values.argmax()])
 
-    def region_backend(self, location, level, size, **kwargs):
-        return self.handle.read_region(location=location, level=level, size=size, **kwargs)
+    def region_backend(self, location, level, size, **kwargs) -> CuImage:
+        with cp.cuda.Device(self.device.device_id):
+            return self.handle.read_region(location=location, level=level, size=size, **kwargs)
 
     @staticmethod
-    def backend_to_array(region: Union[CuImage, cp.ndarray]) -> cp.ndarray:
-        return cp.array(region, copy=False)
+    def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray:
+        device = CuImageHandle.validate_device(device)
+        with cp.cuda.Device(device.device_id):
+            result = cp.array(region, copy=False)
+            logging.debug(f"{__name__} - {device.device_id} == {result.device}")
+            return result
 
     @staticmethod
     def array_to_numpy(arr: cp.ndarray) -> np.ndarray:
@@ -149,7 +177,7 @@ def array_to_numpy(arr: cp.ndarray) -> np.ndarray:
 
     @classmethod
     def backend_to_pil(cls, region: CuImage) -> PILImage:
-        return Image.fromarray(cls.backend_to_array(region).get())
+        return Image.fromarray(cls.backend_to_array(region, None).get())
 
     def read_label(self) -> CuImage:
         return self.handle.associated_image("label")
@@ -157,14 +185,28 @@ def read_label(self) -> CuImage:
     def read_macro(self) -> CuImage:
         return self.handle.associated_image("macro")
 
-    @LazyProperty
+    @classmethod
+    def new_associated_images(cls, handle: CuImage) -> Mapping:
+        if handle is None or not hasattr(handle, "associated_images"):
+            return MappingProxyType(dict())
+        keys = handle.associated_images
+        return MappingProxyType({k: handle.associated_image(k) for k in keys})
+
+    @property
     def associated_images(self) -> Mapping:
-        keys = self.handle.associated_images
-        return {k: self.handle.associated_image(k) for k in keys}
+        handle = getattr(self, "handle", None)
+        if not hasattr(self, "_associated_images") or self._associated_images is None:
+            self._associated_images = self.__class__.new_associated_images(handle)
+        return self._associated_images
+
+    def clear_associated_images(self):
+        self._associated_images = None
 
     @staticmethod
-    def grid_stack(grid: List[List[cp.ndarray]]):
-        return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1)
+    def grid_stack(grid: List[List[cp.ndarray]], device: Optional[Device]):
+        device = CuImageHandle.validate_device(device)
+        with cp.cuda.Device(device.device_id):
+            return cp.concatenate([cp.concatenate(row, axis=0) for row in grid], axis=1)
 
     @staticmethod
     def backend_dim(region: CuImage) -> Tuple[int, int]:
@@ -175,6 +217,8 @@ def array_shape(arr: cp.ndarray) -> Tuple[int, ...]:
         return arr.shape
 
     def release(self):
+        # todo - what's the better practice? This forces to free everything and can lock up the kernel for a couple
+        # todo - of seconds
         cp.get_default_memory_pool().free_all_blocks()
         cp.get_default_pinned_memory_pool().free_all_blocks()
 
@@ -183,12 +227,13 @@ def close_handle(self):
             self.handle.close()
             del self.handle
             self.handle = None
-            gc.collect()
         if self.dummy_handle is not None:
             self.dummy_handle.close()
             self.dummy_handle = None
+        # clear the cached map
+        self.clear_associated_images()
         self.release()
 
     @property
-    def device(self) -> ArrayDevice:
-        return ArrayDevice.CUDA
+    def device_type(self) -> ArrayDeviceType:
+        return ArrayDeviceType.CUDA
diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py
index c426cf2..1c28c33 100644
--- a/histoqc/wsi_handles/openslide_handle.py
+++ b/histoqc/wsi_handles/openslide_handle.py
@@ -3,12 +3,12 @@
 
 from .base import WSIImageHandle
 from histoqc.import_wrapper.openslide import openslide
-from typing import Union, Tuple, Sequence, List, Mapping
-from typing import cast
+from typing import Union, Tuple, Sequence, List, Mapping, cast, Optional
 from PIL.Image import Image as PILImage
 from .utils import rgba2rgb_pil
 from PIL import Image
-from histoqc.array_adapter import ArrayDevice
+from histoqc.array_adapter import ArrayDeviceType, Device
+import logging
 
 
 class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray]):
@@ -16,17 +16,25 @@ class OpenSlideHandle(WSIImageHandle[openslide.OpenSlide, PILImage, np.ndarray])
     _magnification_factor: str
     _has_bounding_box: bool
     fname: str
-    handle: openslide.OpenSlide
+    handle: Optional[openslide.OpenSlide]
+
+    @classmethod
+    def sanitize_device(cls, device: Optional[Device]):
+        if device is None:
+            return
+        if device.device_type is not ArrayDeviceType.CPU:
+            logging.warning(f"Expect CPU but got {device.device_type}. No effect."
+                            f"Check upstream device settings")
 
     def backend_rgba2rgb(self, img) -> PILImage:
         return rgba2rgb_pil(img, self.background_color)
 
-    def __init__(self, fname):
-        super().__init__(fname)
+    def __init__(self, fname: str, device_id: Optional[int] = None):
+        super().__init__(fname, device_id)
         self.handle = openslide.OpenSlide(fname)
         self._has_bounding_box = True
         self._bounding_box = self.__get_bounding_box()
-        
+
         # get magnification factor from wsi slide
         self._magnification_factor = self.handle.properties.get("openslide.objective-power") or \
             self.handle.properties.get("aperio.AppMag")
@@ -97,7 +105,8 @@ def comment(self) -> str:
         return self.handle.properties.get("openslide.comment", "NA")
 
     @classmethod
-    def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int]):
+    def region_resize_arr(cls, data: np.ndarray, new_size_wh: Tuple[int, int], device: Optional[Device]):
+        cls.sanitize_device(device)
         return np.array(Image.fromarray(data).resize(new_size_wh), copy=False)
 
     def get_thumbnail(self, new_dim):
@@ -116,7 +125,8 @@ def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage:
         return region
 
     @staticmethod
-    def backend_to_array(region: PILImage) -> np.ndarray:
+    def backend_to_array(region: PILImage, device: Optional[Device]) -> np.ndarray:
+        OpenSlideHandle.sanitize_device(device)
         return np.array(region)
 
     @staticmethod
@@ -134,7 +144,8 @@ def associated_images(self) -> Mapping[str, PILImage]:
         return self.handle.associated_images
 
     @staticmethod
-    def grid_stack(grid: List[List[np.ndarray]]):
+    def grid_stack(grid: List[List[np.ndarray]], device: Optional[Device]) -> np.ndarray:
+        OpenSlideHandle.sanitize_device(device)
         return np.concatenate([np.concatenate(row, axis=0) for row in grid], axis=1)
 
     @staticmethod
@@ -151,5 +162,8 @@ def close_handle(self):
         self.handle = None
 
     @property
-    def device(self) -> ArrayDevice:
-        return ArrayDevice.CPU
+    def device_type(self) -> ArrayDeviceType:
+        return ArrayDeviceType.CPU
+
+    def release(self):
+        ...

From f02d76bd42cfd3fe4a71634b57070c2851388213 Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Thu, 9 May 2024 08:42:57 -0400
Subject: [PATCH 5/6] Introduce Dask and Dask-GPU for CPU/GPU
 distributed/multiprocessing tasks.

---
 histoqc/BlurDetectionModule.py          |  11 +-
 histoqc/ClassificationModule.py         |  26 ++-
 histoqc/LightDarkModule.py              |   6 +-
 histoqc/__main__.py                     | 163 +++++++++--------
 histoqc/_log_conf.py                    | 205 +++++++++++++++++++++
 histoqc/_logging.py                     | 226 ++++++++++++++++++++++++
 histoqc/_pipeline.py                    |  84 +--------
 histoqc/_worker.py                      |  76 ++++----
 histoqc/array_adapter/adapter.py        |  31 ++--
 histoqc/import_wrapper/cupy_extra.py    |  11 +-
 histoqc/import_wrapper/dask_cuda.py     |  12 ++
 histoqc/wsi_handles/base.py             |   2 +-
 histoqc/wsi_handles/cuimage_handle.py   |  95 ++++++----
 histoqc/wsi_handles/openslide_handle.py |   3 +-
 requirements.txt                        |   1 +
 setup.py                                |   2 +-
 16 files changed, 706 insertions(+), 248 deletions(-)
 create mode 100644 histoqc/_log_conf.py
 create mode 100644 histoqc/_logging.py
 create mode 100644 histoqc/import_wrapper/dask_cuda.py

diff --git a/histoqc/BlurDetectionModule.py b/histoqc/BlurDetectionModule.py
index 176a132..4e173c6 100644
--- a/histoqc/BlurDetectionModule.py
+++ b/histoqc/BlurDetectionModule.py
@@ -18,12 +18,19 @@ def identifyBlurryRegions(s: BaseImage, params):
     adapter = s.image_handle.adapter
     blur_radius = int(params.get("blur_radius", 7))
     blur_threshold = float(params.get("blur_threshold", .1))
-    img = s.getImgThumb(params.get("image_work_size", "2.5x"))
-    img = adapter(rgb2gray)(img)
+    img_thumb = s.getImgThumb(params.get("image_work_size", "2.5x"))
+
+    img = adapter(rgb2gray)(img_thumb)
     # use the __abs__ interface
 
+    logging.debug(f"{s['filename']} - \tidentifyBlurryRegions Gray:"
+                  f" {img.max(), img.min(), blur_radius, blur_threshold}")
+
     img_laplace = abs(adapter(skimage.filters.laplace)(img))
+
+    logging.debug(f"{s['filename']} - \tidentifyBlurryRegions img_laplace: {img_laplace.max(), img_laplace.min()}")
     mask = adapter(skimage.filters.gaussian)(img_laplace, sigma=blur_radius) <= blur_threshold
+
     # for some reason resize takes a grayscale and produces a 3chan
     # Note: the reason you obtain a 3chan is that you specified a 3chan output shape
     mask_resized_shape = s.getImgThumb(s["image_work_size"]).shape[:2]
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index 6c7f81f..5096bc9 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -4,7 +4,7 @@
 import sys
 from histoqc.array_adapter import ArrayAdapter, Device
 from ast import literal_eval as make_tuple
-
+import traceback
 from distutils.util import strtobool
 
 from histoqc.BaseImage import printMaskHelper, BaseImage
@@ -81,9 +81,15 @@ def compute_rgb(img, params):
 def compute_laplace(img, params):
     laplace_ksize = int(params.get("laplace_ksize", 3))
     adapter = params["adapter"]
+    logging.debug(f"{__name__} - NaN check laplace img: {np.isnan(img).any()}")
+    # adapter.imsave(f"~/dbg/{params['filename']}_img_laplace.png", img)
     img_gray = adapter(rgb2gray)(img)
+    # adapter.imsave(f"~/dbg/{params['filename']}_gray_laplace.png", adapter(img_as_ubyte)(img_gray))
     #     return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None]
-    return adapter(laplace)(img_gray, ksize=laplace_ksize)[:, :, None]
+    logging.debug(f"{__name__} - NaN check laplace gray: {np.isnan(img_gray).any()}, {type(img)}")
+    feat = adapter(laplace)(img_gray, ksize=laplace_ksize)
+    logging.debug(f"{__name__} - NaN check laplace feat: {np.isnan(feat).any()}")
+    return feat[:, :, None]
 
 
 def compute_lbp(img, params):
@@ -157,11 +163,16 @@ def compute_frangi(img, params):
     sigmas = frangi_scale_range + (frangi_scale_step,)
 
     adapter: ArrayAdapter = params["adapter"]
+    # adapter.imsave(f"~/dbg/{params['filename']}_img_frangi.png", img)
+    logging.debug(f"{__name__} - NaN check frangi img: {np.isnan(img).any()}")
     img_gray = adapter(rgb2gray)(img)
+    # adapter.imsave(f"~/dbg/{params['filename']}_gray_frangi.png", adapter(img_as_ubyte)(img_gray))
+    logging.debug(f"{__name__} - NaN check frangi gray: {np.isnan(img_gray).any()}")
     feat = adapter(frangi)(img_gray, sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2,
                            black_ridges=frangi_black_ridges)
     # feat = frangi(rgb2gray(img), sigmas=sigmas, beta=frangi_beta1, gamma=frangi_beta2,
     #               black_ridges=frangi_black_ridges)
+    logging.debug(f"{__name__} - NaN check frangi feat: {np.isnan(feat).any()}")
     return feat[:, :, None]  # add singleton dimension
 
 
@@ -195,6 +206,7 @@ def byExampleWithFeatures(s: BaseImage, params):
 
     adapter = s.image_handle.adapter
     params['adapter'] = adapter
+    params['filename'] = s['filename']
     with params["lock"]:  # this lock is shared across all threads such that only one thread needs to train the model
         # then it is shared with all other modules
         if not params["shared_dict"].get("model_" + name, False):
@@ -239,6 +251,9 @@ def byExampleWithFeatures(s: BaseImage, params):
             # do stuff here with model_vals
             model_vals = np.vstack(model_vals)
             clf = RandomForestClassifier(n_jobs=-1)
+            # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}")
+            model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels,
+                                                                    device=Device.build(Device.DEVICE_CPU), copy=True)
             adapter(clf.fit)(model_vals, y=model_labels.ravel())
             # clf.fit(model_vals, y=model_labels.ravel())
             params["shared_dict"]["model_" + name] = clf
@@ -247,7 +262,12 @@ def byExampleWithFeatures(s: BaseImage, params):
     clf = params["shared_dict"]["model_" + name]
     img = s.getImgThumb(s["image_work_size"])
     feats = compute_features(img, params)
-    cal = adapter(clf.predict_proba)(feats.reshape(-1, feats.shape[2]))
+    logging.debug(f"{__name__} - {s['filename']} - NaN check img: {np.isnan(img).any()}")
+    logging.debug(f"{__name__} - {s['filename']} - NaN check feats: {np.isnan(feats).any()}")
+
+    feats = feats.reshape(-1, feats.shape[2])
+    feats = adapter.curate_arrays_device(feats, device=Device.build(Device.DEVICE_CPU), copy=True)
+    cal = adapter(clf.predict_proba)(feats)
     cal = cal.reshape(img.shape[0], img.shape[1], 2)
 
     mask = cal[:, :, 1] > thresh
diff --git a/histoqc/LightDarkModule.py b/histoqc/LightDarkModule.py
index 2312089..fc9f5c4 100644
--- a/histoqc/LightDarkModule.py
+++ b/histoqc/LightDarkModule.py
@@ -68,11 +68,11 @@ def getIntensityThresholdPercent(s: BaseImage, params):
     img = s.getImgThumb(s["image_work_size"])
     img_std = img.std(axis=2)
 
-    map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std)
+    map_std = adapter.and_(img_std > lower_std, img_std < upper_std)
 
     img = adapter(color.rgb2gray)(img)
-    region_between_interval = np.bitwise_and(img > lower_thresh, img < upper_thresh)
-    region_between_interval = np.bitwise_and(region_between_interval, map_std)
+    region_between_interval = adapter.and_(img > lower_thresh, img < upper_thresh)
+    region_between_interval = adapter.and_(region_between_interval, map_std)
 
     s["img_mask_" + name] = region_between_interval > 0
 
diff --git a/histoqc/__main__.py b/histoqc/__main__.py
index 8462cc1..b8ec752 100644
--- a/histoqc/__main__.py
+++ b/histoqc/__main__.py
@@ -7,26 +7,29 @@
 import os
 import sys
 import time
-from functools import partial
+# from functools import partial
 from typing import Tuple, Optional, List
 
+import dask.distributed
+
 from histoqc._pipeline import BatchedResultFile
-from histoqc._pipeline import MultiProcessingLogManager
+# from histoqc._pipeline import MultiProcessingLogManager
+from copy import deepcopy
+from histoqc._logging import LoggingSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, HDL_OUT_FIELD
 from histoqc._pipeline import load_pipeline
 from histoqc._pipeline import log_pipeline
 from histoqc._pipeline import move_logging_file_handler
-from histoqc._pipeline import setup_logging
+# from histoqc._pipeline import setup_logging
 from histoqc._pipeline import setup_plotting_backend
-from histoqc._worker import worker
-from histoqc._worker import worker_setup
-from histoqc._worker import worker_success
-from histoqc._worker import worker_error
-from histoqc._worker import PARAM_SHARE, device_assign, KEY_ASSIGN
+from histoqc._worker import (worker, worker_setup, worker_success, worker_error, worker_single_process,
+                             PARAM_SHARE, KEY_ASSIGN)
+
 from histoqc.config import read_config_template
 from histoqc.data import managed_pkg_data
 from histoqc.wsi_handles.constants import KEY_CUCIM
-from histoqc.array_adapter.adapter import cupy_installed
-from histoqc.import_wrapper.cupy_extra import cp
+from histoqc.import_wrapper.cupy_extra import cp, cupy_installed
+from histoqc.import_wrapper.dask_cuda import dask_cuda_installed, dask_cuda
+from dask.distributed import Client, as_completed
 
 
 def parse_config(args: argparse.Namespace) -> Tuple[configparser.ConfigParser, Optional[str]]:
@@ -48,15 +51,15 @@ def _get_device_list(n_proc: int):
 
 
 def parse_multiprocessing(args: argparse.Namespace,
-                          config: configparser.ConfigParser) -> Tuple[argparse.Namespace, List[int]]:
+                          config: configparser.ConfigParser) -> Tuple[argparse.Namespace, bool]:  # List[int],
     is_multiproc = args.nprocesses >= 0
     is_cuda = KEY_CUCIM in config["BaseImage.BaseImage"].get("handles", "")
 
     # if use cuda but without installation of dependencies - return
-    if not is_cuda or not cupy_installed():
-        return args, _get_device_list(args.nprocesses)
+    if not is_cuda or not (cupy_installed() and dask_cuda_installed()):
+        return args, False  # _get_device_list(args.nprocesses),
     # guard
-    assert is_cuda and cp is not None, f"Enable CUDA but cupy is not installed"
+    assert is_cuda and cp is not None and dask_cuda is not None, f"Enable CUDA but Dep is not installed"
     # set spawn
     if is_multiproc:
         multiprocessing.set_start_method("spawn", force=True)
@@ -68,8 +71,16 @@ def parse_multiprocessing(args: argparse.Namespace,
                         f"{args.nprocesses} > {num_devices}. Cutoff the number of processes to {num_devices}")
     args.nprocesses = min(args.nprocesses, num_devices)
     # device list --> if
-    device_list = _get_device_list(args.nprocesses)
-    return args, device_list
+    # device_list = _get_device_list(args.nprocesses)
+    return args, is_cuda
+
+
+def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]]):
+    assert nprocesses > 0, f"Expect number of processes > 0 to launch the cluster. Get {nprocesses}"
+    if not is_cuda:
+        return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, )
+    assert cp is not None and dask_cuda is not None
+    return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, n_workers=nprocesses)
 
 
 @managed_pkg_data
@@ -111,21 +122,35 @@ def main(argv=None):
     parser.add_argument('--symlink', metavar="TARGET_DIR",
                         help="create symlink to outdir in TARGET_DIR",
                         default=None)
+    parser.add_argument('--gpu_ids',
+                        type=int,
+                        nargs='+',
+                        help="GPU Devices to use (None=use all). Default: None",
+                        default=None)
     args = parser.parse_args(argv)
 
     # --- multiprocessing and logging setup -----------------------------------
-    # todo: move config parsing above the mpm initialization and set the start method accordingly
-    # multiprocessing.set_start_method("spawn")
-
-    setup_logging(capture_warnings=True, filter_warnings='ignore')
 
     # --- parse the pipeline configuration ------------------------------------
     config, conf_warn_msg = parse_config(args)
-    args, device_list = parse_multiprocessing(args, config)
-    mpm = multiprocessing.Manager()
-    lm = MultiProcessingLogManager('histoqc', manager=mpm)
+    args, is_cuda = parse_multiprocessing(args, config)
+
+    # --- create output directory and move log --------------------------------
+    args.outdir = os.path.expanduser(args.outdir)
+    os.makedirs(args.outdir, exist_ok=True)
+    # move_logging_file_handler(logging.getLogger(), args.outdir)
+
+    logging_setup = LoggingSetup(deepcopy(MAIN_CONF_BUILD),
+                                 deepcopy(WORKER_CONF_BUILD),
+                                 capture_warnings=True, filter_warnings='ignore')
+    # setup main proc logger config and file handler.
+    logging_setup.setup_main_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN,
+                                    handler_name=HDL_FILE, out_field=HDL_OUT_FIELD)
+
+    # Inherit from the root logger.
+    main_logger = logging.getLogger(__name__)
     if conf_warn_msg:
-        lm.logger.warning(conf_warn_msg)
+        main_logger.warning(conf_warn_msg)
 
     # --- provide models, pen and templates as fallbacks from package data ----
 
@@ -133,27 +158,23 @@ def main(argv=None):
 
     # --- load the process queue (error early) --------------------------------
 
-    _steps = log_pipeline(config, log_manager=lm)
+    _steps = log_pipeline(config, logger=main_logger)
     process_queue = load_pipeline(config)
 
     # --- check symlink target ------------------------------------------------
-
     if args.symlink is not None:
         if not os.path.isdir(args.symlink):
-            lm.logger.error("error: --symlink {args.symlink} is not a directory")
+            main_logger.error("error: --symlink {args.symlink} is not a directory")
             return -1
 
-    # --- create output directory and move log --------------------------------
-    args.outdir = os.path.expanduser(args.outdir)
-    os.makedirs(args.outdir, exist_ok=True)
-    move_logging_file_handler(logging.getLogger(), args.outdir)
-
     if BatchedResultFile.results_in_path(args.outdir):
         if args.force:
-            lm.logger.info("Previous run detected....overwriting (--force set)")
+            main_logger.info("Previous run detected....overwriting (--force set)")
         else:
-            lm.logger.info("Previous run detected....skipping completed (--force not set)")
-
+            main_logger.info("Previous run detected....skipping completed (--force not set)")
+    # for writing the results after workers succeed.
+    mpm = multiprocessing.Manager()
+    # results only utilize the lock and sync list from mpm. mpm is not saved.
     results = BatchedResultFile(args.outdir,
                                 manager=mpm,
                                 batch_size=args.batch,
@@ -189,9 +210,9 @@ def main(argv=None):
         pth = os.path.join(args.basepath, args.input_pattern[0])
         files = glob.glob(pth, recursive=True)
 
-    lm.logger.info("-" * 80)
+    main_logger.info("-" * 80)
     num_files = len(files)
-    lm.logger.info(f"Number of files detected by pattern:\t{num_files}")
+    main_logger.info(f"Number of files detected by pattern:\t{num_files}")
 
     # --- start worker processes ----------------------------------------------
 
@@ -199,66 +220,52 @@ def main(argv=None):
         'process_queue': process_queue,
         'config': config,
         'outdir': args.outdir,
-        'log_manager': lm,
-        'lock': mpm.Lock(),
-        PARAM_SHARE: mpm.dict(),
+        'lock': mpm.Lock(),  # todo transit to Dask's Lock
+        PARAM_SHARE: mpm.dict(),  # todo transit to Dask's Variable
         'num_files': num_files,
         'force': args.force,
     }
     # init the dict of device assignment
     _shared_state[PARAM_SHARE][KEY_ASSIGN] = mpm.dict()
     failed = mpm.list()
-    setup_plotting_backend(lm.logger)
+    setup_plotting_backend(main_logger)
 
     try:
-        # todo: for cuda --> must use spawn method if CUDA is enabled.
-        # todo: however a better memory management scheme should be tested.
-        # todo: since the single-processing cucim already outperforms multi-cpu counterpart in terms of runtime
-        # todo: at this moment we simply override the args.nprocesses
 
         if args.nprocesses > 0:
+            local_cluster = new_cluster('histoqc', is_cuda=is_cuda,
+                                        nprocesses=args.nprocesses, gpu_ids=args.gpu_ids)
+            with Client(local_cluster) as client:
+                # register the worker side
+                logging_setup.setup_client(client, forward_name="root")
 
-            with lm.logger_thread():
-                with multiprocessing.Pool(processes=args.nprocesses,
-                                          initializer=worker_setup,
-                                          initargs=(config, device_list, _shared_state)) as pool:
-                    try:
-                        for idx, file_name in enumerate(files):
-                            _ = pool.apply_async(
-                                func=worker,
-                                args=(idx, file_name),
-                                kwds=_shared_state,
-                                callback=partial(worker_success, result_file=results),
-                                error_callback=partial(worker_error, failed=failed),
-                            )
-
-                    finally:
-                        pool.close()
-                        pool.join()
+                # noinspection PyTypeChecker
+                futures_list = [client.submit(worker, idx, file_name, **_shared_state)
+                                for idx, file_name in enumerate(files)]
 
+                for future in as_completed(futures_list):
+                    try:
+                        base_img_finished = future.result()
+                    except Exception as exc:
+                        worker_error(exc, failed)
+                    else:
+                        worker_success(base_img_finished, result_file=results)
         else:
-            for idx, file_name in enumerate(files):
-                try:
-                    device_assign(device_list, _shared_state[PARAM_SHARE])
-                    _success = worker(idx, file_name, **_shared_state)
-                except Exception as exc:
-                    worker_error(exc, failed)
-                    continue
-                else:
-                    worker_success(_success, results)
+            worker_setup()
+            worker_single_process(files, failed, results, **_shared_state)
 
     except KeyboardInterrupt:
-        lm.logger.info("-----REQUESTED-ABORT-----\n")
+        main_logger.info("-----REQUESTED-ABORT-----\n")
 
     else:
-        lm.logger.info("----------Done-----------\n")
+        main_logger.info("----------Done-----------\n")
 
     finally:
-        lm.logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log),"
-                       " warnings are listed in warnings column in output")
+        main_logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log),"
+                         " warnings are listed in warnings column in output")
 
         for file_name, error, tb in failed:
-            lm.logger.info(f"{file_name}\t{error}\n{tb}")
+            main_logger.info(f"{file_name}\t{error}\n{tb}")
 
     if args.symlink is not None:
         origin = os.path.realpath(args.outdir)
@@ -268,9 +275,9 @@ def main(argv=None):
         )
         try:
             os.symlink(origin, target, target_is_directory=True)
-            lm.logger.info("Symlink to output directory created")
+            main_logger.info("Symlink to output directory created")
         except (FileExistsError, FileNotFoundError):
-            lm.logger.error(
+            main_logger.error(
                 f"Error creating symlink to output in '{args.symlink}', "
                 f"Please create manually: ln -s {origin} {target}"
             )
diff --git a/histoqc/_log_conf.py b/histoqc/_log_conf.py
new file mode 100644
index 0000000..ae384b8
--- /dev/null
+++ b/histoqc/_log_conf.py
@@ -0,0 +1,205 @@
+from typing_extensions import TypedDict, NotRequired, Literal, Self
+from typing import Dict, Type, Callable, Any, List, Optional, Sequence, get_args, cast
+import logging.config
+
+FormattersDict = TypedDict('FormattersDict', {
+    'format': str,
+    'datefmt': str,
+    'style': str,
+    'validate': str,
+    'defaults': str,
+    'class': NotRequired[str],
+
+}, total=False)
+
+FilterDict = TypedDict("FilterDict", {
+    '()': Type[logging.Filter] | Callable,
+    'args': NotRequired[Any],
+})
+
+HandlerDict = TypedDict("HandlerDict", {
+    "class": str,
+    "level": NotRequired[str],
+    "formatter": NotRequired[str],
+    "filters": NotRequired[List[str | logging.Filter]],
+    "filename": NotRequired[str],
+    "stream": NotRequired[str],
+    "mode": NotRequired[str],
+})
+
+
+class LoggerDict(TypedDict, total=False):
+    level: str
+    propagate: bool
+    filters: List[str | logging.Filter]
+    handlers: List[str]
+
+
+class ConfigDict(TypedDict):
+
+    version: int
+    formatters: NotRequired[Dict[str, FormattersDict]]
+    filters: NotRequired[Dict[str, FilterDict]]
+    handlers: NotRequired[Dict[str, HandlerDict]]
+    loggers: NotRequired[Dict[str, LoggerDict]]
+    root: NotRequired[LoggerDict]
+    incremental: NotRequired[bool]  # default False
+    disable_existing_loggers: NotRequired[bool]  # default True
+
+
+DEFAULT_CONSOLE = Literal['console']
+DEFAULT_FILE = Literal['file']
+TYPE_PREDEFINED_HANDLER = Literal[DEFAULT_CONSOLE, DEFAULT_FILE]
+
+DEFAULT_STD_OUT: str = 'ext://sys.stdout'
+DEFAULT_LOG_FN: str = 'error.log'
+DEFAULT_HANDLER_OUT_MAP: Dict[TYPE_PREDEFINED_HANDLER, str] = {
+    get_args(DEFAULT_CONSOLE)[0]: DEFAULT_STD_OUT,
+    get_args(DEFAULT_FILE)[0]: DEFAULT_LOG_FN,
+}
+
+TYPE_FORMAT_DEFAULT = Literal['default']
+TYPE_FORMAT_SIMPLE = Literal['simple']
+TYPE_PREDEFINED_FORMATTER = Literal[TYPE_FORMAT_DEFAULT, TYPE_FORMAT_SIMPLE]
+
+PREDEFINED_FORMATTER_MAP: Dict[TYPE_PREDEFINED_FORMATTER, FormattersDict] = {
+    get_args(TYPE_FORMAT_DEFAULT)[0]: {
+        'class': 'logging.Formatter',
+        'format': '%(asctime)s - %(levelname)s - %(message)s'
+    },
+    get_args(TYPE_FORMAT_SIMPLE)[0]: {
+        'class': 'logging.Formatter',
+        'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    }
+}
+
+LEVEL_DEBUG = Literal['DEBUG']
+LEVEL_INFO = Literal['INFO']
+LEVEL_WARNING = Literal['WARNING']
+LEVEL_ERROR = Literal['ERROR']
+LEVEL_CRITICAL = Literal['CRITICAL']
+LEVEL_NOTSET = Literal['NOTSET']
+LEVEL_TYPE = Literal[LEVEL_NOTSET, LEVEL_CRITICAL, LEVEL_ERROR, LEVEL_WARNING, LEVEL_INFO, LEVEL_DEBUG]
+
+
+def default_init_helper(d: Dict, key: str, factory: Callable) -> Any:
+    if key in d:
+        return
+    d[key] = factory()
+
+
+def init(attr: str, key: str, factory: Callable) -> Callable:
+    def decorator(func):
+        def wrapped(self, *args, **kwargs):
+            # Execute the helper function with self.some_attr, b, c
+            default_init_helper(getattr(self, attr), key, factory)
+            # Then execute the original function
+            return func(self, *args, **kwargs)
+        return wrapped
+    return decorator
+
+
+class LoggerConfigBuilder:
+    """Builder to create log conf schema.
+
+    Examples:
+        config_builder = LoggerConfigBuilder(version=1)
+        config = (config_builder
+              .add_formatter_by_type(formatter_type='simple')
+              .add_handler_by_type(handler_type='console', level='DEBUG', formatter_type='simple')
+              .add_handler_by_type(handler_type='logfile', level='ERROR', formatter_type='simple')
+              .add_logger(name='myapp', level='DEBUG', handlers=['console', 'logfile'])
+              .config)
+    """
+
+    def __init__(self, version: int):
+        self._config = ConfigDict(version=version, )
+
+    @staticmethod
+    def get_predefined_formatter(formatter_type) -> FormattersDict:
+        if formatter_type not in PREDEFINED_FORMATTER_MAP:
+            raise ValueError(f'Unknown predefined formatter: {formatter_type}')
+        return PREDEFINED_FORMATTER_MAP[formatter_type]
+
+    @staticmethod
+    def get_predefined_handler(handler_type: TYPE_PREDEFINED_HANDLER,
+                               level: LEVEL_TYPE = 'DEBUG',
+                               formatter: str = 'simple',
+                               target: Optional[str] = None,
+                               mode: str = 'w') -> HandlerDict:
+        target = target if target is not None else DEFAULT_HANDLER_OUT_MAP[handler_type]
+        if handler_type == get_args(DEFAULT_CONSOLE)[0]:
+            return {
+                'class': 'logging.StreamHandler',
+                'level': level,
+                'formatter': formatter,
+                'stream': target
+            }
+        elif handler_type == get_args(DEFAULT_FILE)[0]:
+            return {
+                'class': 'logging.FileHandler',
+                'level': level,
+                'formatter': formatter,
+                'filename': 'app.log',
+                'mode': mode,
+            }
+        raise ValueError(f'Unknown pre-defined handler type: {handler_type}')
+
+    @staticmethod
+    def create_logger(*, level: LEVEL_TYPE = 'DEBUG', handlers: List[str] = ('console', ),
+                      propagate: bool = False,
+                      filters: Optional[List[str | logging.Filter]] = None) -> LoggerDict:
+        if filters is None:
+            filters = []
+        return LoggerDict(level=level, handlers=handlers, propagate=propagate, filters=filters)
+
+    @init("config", 'formatters', dict)
+    def add_formatter_by_type(self, *, formatter_type: TYPE_PREDEFINED_FORMATTER):
+        # default_init(self.config, 'formatters', dict)
+        self._config['formatters'][formatter_type] = self.get_predefined_formatter(formatter_type)
+        return self
+
+    @init("config", 'formatters', dict)
+    def add_formatter(self, *, name: str, formatter_dict: FormattersDict):
+        # default_init(self.config, 'formatters', dict)
+        self._config['formatters'][name] = formatter_dict
+        return self
+
+    @init("config", 'handlers', dict)
+    def add_handler(self, *, name: str, handler_dict: HandlerDict):
+        self._config['handlers'][name] = handler_dict
+        return self
+
+    @init("config", 'handlers', dict)
+    def add_handler_by_type(self, *, handler_type: TYPE_PREDEFINED_HANDLER, level: LEVEL_TYPE,
+                            formatter: str):
+        self._config['handlers'][handler_type] = self.get_predefined_handler(handler_type, level, formatter)
+        return self
+
+    @init("config", 'loggers', dict)
+    def add_logger(self, *, name: str, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False,
+                   filters: Optional[Sequence[str | logging.Filter]] = None) -> Self:
+        self._config['loggers'][name] = self.create_logger(level=level, handlers=handlers,
+                                                           propagate=propagate,
+                                                           filters=filters)
+        return self
+
+    @init("config", 'root', dict)
+    def add_root(self, *, level: LEVEL_TYPE, handlers: List[str], propagate: bool = False,
+                 filters: Optional[Sequence[str | logging.Filter]] = None) -> Self:
+        self._config['root'] = self.create_logger(level=level, handlers=handlers,
+                                                  propagate=propagate,
+                                                  filters=filters)
+        return self
+
+    @init("config", 'handlers', dict)
+    def set_handler_target(self, handler_name: str, out_field: str, out_value: str):
+        assert handler_name in self.config['handlers']
+        handler_dict: HandlerDict = self.config['handlers'][handler_name]
+        out_field = cast(Literal['filename'], out_field)
+        handler_dict[out_field] = out_value
+        return self
+
+    @property
+    def config(self):
+        return self._config
diff --git a/histoqc/_logging.py b/histoqc/_logging.py
new file mode 100644
index 0000000..7bf7a8f
--- /dev/null
+++ b/histoqc/_logging.py
@@ -0,0 +1,226 @@
+from __future__ import annotations
+import multiprocessing
+import dask
+import threading
+import os
+from contextlib import contextmanager
+from logging.handlers import QueueHandler
+from typing import Dict, cast, Optional, List
+from typing_extensions import Literal, Self
+import warnings
+from dask.distributed import WorkerPlugin
+import logging
+from logging.config import dictConfig
+from dask.distributed import get_worker
+from histoqc._log_conf import LoggerConfigBuilder
+DEFAULT_LOG_FN = 'error.log'
+FMT_DFT = cast(Literal['default'], 'default')
+HDL_CONSOLE = 'console'
+HDL_FILE = 'file'
+HDL_OUT_FIELD = 'filename'
+MAIN_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT)
+                   .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT)
+                   .add_handler_by_type(handler_type=HDL_FILE, level='WARNING', formatter=FMT_DFT)
+                   .add_root(level="INFO", handlers=[HDL_CONSOLE, HDL_FILE])
+                   .set_handler_target(handler_name=HDL_FILE, out_field=HDL_OUT_FIELD, out_value=DEFAULT_LOG_FN))
+
+WORKER_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT)
+                     .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT)
+                     .add_root(level="INFO", handlers=[HDL_CONSOLE]))
+
+
+def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore'):
+    # configure warnings too...
+    filter_type = Literal["default", "error", "ignore", "always", "module", "once"]
+    warnings.filterwarnings(cast(filter_type, filter_warnings))
+    logging.captureWarnings(capture_warnings)
+
+
+class DaskLogHandler(logging.Handler):
+    """Custom Handler, which emits the topic/message from builtin logging.Logger to dask's centralized logging.
+
+    Could be useful for future if we need to migrate from builtin logger to dask's logger
+    """
+    topic: str
+
+    def emit(self, record):
+        worker = get_worker()
+        log_entry = self.format(record)
+        worker.log_event(self.topic, log_entry)
+
+    def __init__(self, topic: str = 'logs'):
+        super().__init__()
+        self.topic = topic
+
+
+class WorkerInitializer(WorkerPlugin):
+    """Worker Initializer
+
+    """
+    worker_config: Dict
+    logger_names: List[Optional[str]]
+
+    def __init__(self, worker_config: Dict, capture_warnings: bool = True, filter_warnings: str = 'ignore'):
+        self.worker_config = worker_config
+        self.capture_warnings = capture_warnings
+        self.filter_warnings = filter_warnings
+
+    def setup(self, worker):
+        logging.config.dictConfig(self.worker_config)
+        handle_warning(capture_warnings=self.capture_warnings, filter_warnings=self.filter_warnings)
+
+    @classmethod
+    def build(cls, worker_config: Dict,
+              capture_warnings: bool = True, filter_warnings: str = 'ignore') -> Self:
+        return cls(worker_config, capture_warnings, filter_warnings)
+
+
+class LoggingSetup:
+    _plugin: WorkerPlugin
+    _main_build: LoggerConfigBuilder
+    _worker_build_list = List[LoggerConfigBuilder]
+
+    def __init__(self, main_build: LoggerConfigBuilder,
+                 *worker_builds: LoggerConfigBuilder,
+                 capture_warnings: bool = True, filter_warnings: str = 'ignore'):
+        self._main_build = main_build
+        self._worker_build_list = list(worker_builds)
+        self._capture_warnings = capture_warnings
+        self._filter_warnings = filter_warnings
+
+    def is_main_proc(self):
+        return multiprocessing.current_process().name == "MainProcess"
+
+    @classmethod
+    def client_setup_plugin(cls, client: dask.distributed.Client,
+                            conf_build: LoggerConfigBuilder,
+                            capture_warnings: bool,
+                            filter_warnings: str) -> dask.distributed.Client:
+        plugin = WorkerInitializer.build(conf_build.config, capture_warnings=capture_warnings,
+                                         filter_warnings=filter_warnings)
+        client.register_plugin(plugin)
+        return client
+
+    def setup_client(self, client: dask.distributed.Client, forward_name: Optional[str]):
+        # todo fill-in
+        if not self.is_main_proc():
+            return client
+        # main logger setup
+        for wb in self._worker_build_list:
+            client = self.client_setup_plugin(client, wb, self._capture_warnings, self._filter_warnings)
+        # forward to the main process logger
+        client.forward_logging(logger_name=forward_name)
+        return client
+
+    def curate_filename(self, *, output_dir: Optional[str], fname: Optional[str],
+                        handler_name: Optional[str], out_field: Optional[str]):
+        # do nothing if any of below is None:
+        if handler_name is None or fname is None or out_field is None:
+            return
+        dest = fname
+        if output_dir is not None:
+            os.makedirs(output_dir, exist_ok=True)
+            dest = os.path.join(output_dir, fname)
+        assert dest is not None and handler_name is not None and out_field is not None
+        self._main_build = self._main_build.set_handler_target(handler_name=handler_name,
+                                                               out_field=out_field, out_value=dest)
+        return self._main_build
+
+    def setup_main_logger(self, *, output_dir: Optional[str],
+                          fname: Optional[str],
+                          handler_name: Optional[str],
+                          out_field: Optional[str]):
+        if not self.is_main_proc():
+            return
+        # main logger
+        self._main_build = self.curate_filename(output_dir=output_dir, fname=fname,
+                                                handler_name=handler_name, out_field=out_field)
+
+        logging.config.dictConfig(self._main_build.config)
+
+    def setup(self, *,
+              client: dask.distributed.Client,
+              forward_name: Optional[str],
+              output_dir: Optional[str],
+              fname: str = DEFAULT_LOG_FN,
+              handler_name: Optional[str] = HDL_FILE,
+              out_field: Optional[str] = HDL_OUT_FIELD
+              ):
+        if not self.is_main_proc():
+            return
+        self.setup_main_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field)
+        self.setup_client(client, forward_name)
+
+
+class MultiProcessingLogManager:
+    """Adapted from Andreas Poehlmann's implementation.
+
+    Under the hood of dask, worker loggers can be forwarded
+        to client directly for both local and distributed clusters. No need
+
+    """
+    def __init__(self, logger_name, *, manager):
+        """create a MultiProcessingLogManager
+
+        Note: this uses a multiprocessing Queue to correctly transfer
+          logging information from worker processes to the main
+          process logging instance
+
+        Parameters
+        ----------
+        logger_name : str
+            the name of the logger instance
+        manager : multiprocessing.Manager
+            the mp Manager instance used for creating sharable context
+        """
+        self._logger_name = logger_name
+        self._log_queue = manager.Queue()
+        self._log_thread_active = False
+
+    @property
+    def is_main_process(self):
+        return multiprocessing.current_process().name == "MainProcess"
+
+    @property
+    def logger(self):
+        """returns the logger instance"""
+        if self.is_main_process:
+            return logging.getLogger(self._logger_name)
+        else:
+            root = logging.getLogger()
+            if not root.hasHandlers():
+                qh = QueueHandler(self._log_queue)
+                root.setLevel(logging.INFO)
+                root.addHandler(qh)
+                # note: this should be revisited and set by the main process
+                warnings.filterwarnings('ignore')
+                logging.captureWarnings(True)
+            return root
+
+    @contextmanager
+    def logger_thread(self):
+        """context manager for multiprocess logging
+
+        Note: this starts the thread responsible for handing the log records
+          emitted by child processes to the main logger instance
+        """
+        assert self.is_main_process
+        assert not self._log_thread_active  # not reentrant...
+        self._log_thread_active = True
+
+        def process_queue(q, ln):
+            main_logger = logging.getLogger(ln)
+            while True:
+                log_record = q.get()
+                if log_record is None:
+                    break
+                main_logger.handle(log_record)
+
+        lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name))
+        lt.start()
+        try:
+            yield self
+        finally:
+            self._log_queue.put(None)
+            lt.join()
+            self._log_thread_active = False
diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py
index 6c1a4e1..b49f5a9 100644
--- a/histoqc/_pipeline.py
+++ b/histoqc/_pipeline.py
@@ -9,14 +9,11 @@
 import os
 import platform
 import shutil
-import threading
 import warnings
 from contextlib import ExitStack
-from contextlib import contextmanager
 from contextlib import nullcontext
 from importlib import import_module
 from logging.config import dictConfig
-from logging.handlers import QueueHandler
 from typing_extensions import Literal
 from typing import cast
 
@@ -47,7 +44,7 @@ def setup_logging(*, capture_warnings, filter_warnings):
         'handlers': {
             'console': {
                 'class': 'logging.StreamHandler',
-                'level': 'INFO',
+                'level': 'DEBUG',  # todo
                 'formatter': 'default',
             },
             'logfile': {
@@ -102,90 +99,21 @@ def move_logging_file_handler(logger, destination):
         logger.addHandler(new_handler)
 
 
-class MultiProcessingLogManager:
-
-    def __init__(self, logger_name, *, manager):
-        """create a MultiProcessingLogManager
-
-        Note: this uses a multiprocessing Queue to correctly transfer
-          logging information from worker processes to the main
-          process logging instance
-
-        Parameters
-        ----------
-        logger_name : str
-            the name of the logger instance
-        manager : multiprocessing.Manager
-            the mp Manager instance used for creating sharable context
-        """
-        self._logger_name = logger_name
-        self._log_queue = manager.Queue()
-        self._log_thread_active = False
-
-    @property
-    def is_main_process(self):
-        return multiprocessing.current_process().name == "MainProcess"
-
-    @property
-    def logger(self):
-        """returns the logger instance"""
-        if self.is_main_process:
-            return logging.getLogger(self._logger_name)
-        else:
-            root = logging.getLogger()
-            if not root.hasHandlers():
-                qh = QueueHandler(self._log_queue)
-                root.setLevel(logging.INFO)
-                root.addHandler(qh)
-                # note: this should be revisited and set by the main process
-                warnings.filterwarnings('ignore')
-                logging.captureWarnings(True)
-            return root
-
-    @contextmanager
-    def logger_thread(self):
-        """context manager for multiprocess logging
-
-        Note: this starts the thread responsible for handing the log records
-          emitted by child processes to the main logger instance
-        """
-        assert self.is_main_process
-        assert not self._log_thread_active  # not reentrant...
-        self._log_thread_active = True
-
-        def process_queue(q, ln):
-            main_logger = logging.getLogger(ln)
-            while True:
-                log_record = q.get()
-                if log_record is None:
-                    break
-                main_logger.handle(log_record)
-
-        lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name))
-        lt.start()
-        try:
-            yield
-        finally:
-            self._log_queue.put(None)
-            lt.join()
-            self._log_thread_active = False
-
-
-def log_pipeline(config, log_manager):
+def log_pipeline(config, logger: logging.Logger):
     """log the pipeline information
 
     Parameters
     ----------
     config : configparser.ConfigParser
-    log_manager : MultiProcessingLogManager
+    logger : logger obj to log the messages
     """
-    assert log_manager.is_main_process
+    assert multiprocessing.current_process().name == "MainProcess"
     steps = config.get(section='pipeline', option='steps').splitlines()
 
-    log_manager.logger.info("the pipeline will use these steps:")
+    logger.info("the pipeline will use these steps:")
     for process in steps:
         mod_name, func_name = process.split('.')
-        log_manager.logger.info(f"\t\t{mod_name}\t{func_name}")
+        logger.info(f"\t\t{mod_name}\t{func_name}")
     return steps
 
 
diff --git a/histoqc/_worker.py b/histoqc/_worker.py
index 163a998..d55d5d2 100644
--- a/histoqc/_worker.py
+++ b/histoqc/_worker.py
@@ -1,52 +1,36 @@
 """histoqc worker functions"""
-import multiprocessing
+import logging
 import os
 import shutil
 import traceback
 from histoqc.BaseImage import BaseImage
-from histoqc._pipeline import load_pipeline
+from histoqc._pipeline import BatchedResultFile
 from histoqc._pipeline import setup_plotting_backend
-from typing import Dict, List, Optional
-from multiprocessing import managers
-
+from typing import List, Optional
 KEY_ASSIGN: str = 'device_assign'
 PARAM_SHARE: str = 'shared_dict'
 
 
 # --- worker functions --------------------------------------------------------
-def id_assign_helper(device_id_list: List[int], assign_dict: managers.DictProxy):
-    pid = os.getpid()
-    for device_id in device_id_list:
-        if device_id not in assign_dict.values():
-            assign_dict[pid] = device_id
-            return
-
-
-def device_assign(device_id_list: List[int], shared_dict: managers.DictProxy):
-    """Initializer to configure each worker with a specific GPU."""
-    shared_dict[KEY_ASSIGN] = shared_dict.get(KEY_ASSIGN, None)
-    assert shared_dict[KEY_ASSIGN] is not None
-    assert KEY_ASSIGN in shared_dict
-    id_assign_helper(device_id_list, shared_dict[KEY_ASSIGN])
 
-
-def worker_setup(c, device_id_list: List[int], state: Dict):
+# c: configparser.Parser, cuda: bool, device_id_list: List[int], state: Dict
+def worker_setup():
     """needed for multiprocessing worker setup"""
     setup_plotting_backend()
-    shared_dict = state[PARAM_SHARE]
-    load_pipeline(config=c)
-    device_assign(device_id_list, shared_dict)
+    # shared_dict = state[PARAM_SHARE]
+    # load_pipeline(config=c)
+    # device_assign(cuda, device_id_list, shared_dict)
 
 
 def worker(idx, file_name, *,
-           process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force):
+           process_queue, config, outdir, lock, shared_dict, num_files, force):
     """pipeline worker function"""
-
+    logger = logging.getLogger()
     # --- output directory preparation --------------------------------
     fname_outdir = os.path.join(outdir, os.path.basename(file_name))
     if os.path.isdir(fname_outdir):  # directory exists
         if not force:
-            log_manager.logger.warning(
+            logger.warning(
                 f"{file_name} already seems to be processed (output directory exists),"
                 " skipping. To avoid this behavior use --force"
             )
@@ -57,11 +41,13 @@ def worker(idx, file_name, *,
     # create output dir
     os.makedirs(fname_outdir)
 
-    log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")
-    device_id = shared_dict[KEY_ASSIGN].get(os.getpid(), None)
+    logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")
+    # let Dask handle the device visibility/assignment
+    device_id = 0  # shared_dict[KEY_ASSIGN].get(os.getpid(), None)
+    # logger.info(f"{__name__} - {file_name}: Device ID: {dict(shared_dict[KEY_ASSIGN])}")
     if device_id is None:
-        log_manager.logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id."
-                                   f"Default: use 0 for CUDA devices.")
+        logger.warning(f"{__name__}: {file_name}\t\t{idx+1} of {num_files}: Unspecified device_id."
+                       f"Default: use 0 for CUDA devices.")
     s: Optional[BaseImage] = None
 
     try:
@@ -74,13 +60,15 @@ def worker(idx, file_name, *,
             s["completed"].append(process.__name__)
     except Exception as exc:
         # reproduce histoqc error string
+        logger.info(f"{file_name}: Error Block")
         if s is not None:
-            s.image_handle.release()
-        print(f"DBG: {__name__}: {exc}")
+            # s.image_handle.release()
+            s.image_handle.close()
+        # print(f"DBG: {__name__}: {exc}")
         _oneline_doc_str = exc.__doc__.replace('\n', '') if exc.__doc__ is not None else ''
         err_str = f"{exc.__class__} {_oneline_doc_str} {exc}"
         trace_string = traceback.format_exc()
-        log_manager.logger.error(
+        logger.error(
             f"{file_name} - Error analyzing file (skipping): \t {err_str}. Traceback: {trace_string}"
         )
         if exc.__traceback__.tb_next is not None:
@@ -114,7 +102,7 @@ def worker_success(s, result_file):
         result_file.write_line("\t".join([_fields, _warnings]))
 
 
-def worker_error(e, failed):
+def worker_error(e, failed: List):
     """error callback"""
     if hasattr(e, '__histoqc_err__'):
         file_name, err_str, tb = e.__histoqc_err__
@@ -124,3 +112,21 @@ def worker_error(e, failed):
         #   around the worker function
         file_name, err_str, tb = "N/A", f"error outside of pipeline {e!r}", None
     failed.append((file_name, err_str, tb))
+
+
+def worker_flow_for_file(idx: int, file_name: str,
+                         failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]:
+    try:
+        base_image = worker(idx, file_name, **kwargs)
+    except Exception as exc:
+        base_image = None
+        worker_error(exc, failed)
+    else:
+        worker_success(base_image, results)
+    return base_image
+
+
+def worker_single_process(files, failed: List, results: BatchedResultFile, **kwargs) -> Optional[BaseImage]:
+    for idx, file_name in enumerate(files):
+        return worker_flow_for_file(idx, file_name, failed, results, **kwargs)
+
diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py
index 52b3a15..bb6d71f 100644
--- a/histoqc/array_adapter/adapter.py
+++ b/histoqc/array_adapter/adapter.py
@@ -5,7 +5,7 @@
 from numbers import Number
 from typing import Callable, Mapping, Tuple, Optional, Any, Iterable, Dict
 from typing_extensions import Self, TypeGuard
-from histoqc.import_wrapper.cupy_extra import cupy as cp
+from histoqc.import_wrapper.cupy_extra import cupy as cp, cupy_installed
 from enum import Enum
 import logging
 import functools
@@ -14,14 +14,6 @@
 import re
 
 
-def cupy_installed() -> bool:
-    try:
-        import cupy
-        return True
-    except ImportError:
-        return False
-
-
 class ArrayDeviceType(Enum):
     CPU: str = 'cpu'
     CUDA: str = 'cuda'
@@ -134,6 +126,17 @@ class ArrayAdapter(Callable):
 
     id: int
 
+    @classmethod
+    def get_device(cls, arr: TYPE_ARRAY):
+        if not cls.is_array(arr):
+            logging.warning(f"{__name__} {type(arr)} is not an Array")
+            return None
+        if cls.is_numpy(arr):
+            return Device.build(Device.DEVICE_CPU)
+        assert cls.is_cupy(arr), f"Not a Cupy array: {type(arr)}"
+        device_id = arr.device.id
+        return Device(ArrayDeviceType.CUDA, device_id)
+
     @staticmethod
     def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]:
         return isinstance(arr, np.ndarray)
@@ -192,11 +195,16 @@ def _move_to_device(cls,
                         device: Optional[Device], copy: bool = False) -> TYPE_ARRAY:
         # structural match > py3.10
         if device is None or not cls.is_array(arr):
+            logging.debug(f"Not Array")
             return arr
         assert device is not None
         if device.is_cpu():
+            logging.debug(f"Move to CPU. InputType: {type(arr)}. Input Device: {cls.get_device(arr)},"
+                          f" shape{arr.shape}")
             return ArrayAdapter.to_numpy(arr, copy=copy)
         elif device.is_cuda():
+            logging.debug(f"Move to GPU: {device}. InputType: {type(arr)}. Input Device: {cls.get_device(arr)}"
+                          f" shape{arr.shape}")
             return ArrayAdapter.to_cupy(arr, device, copy=copy)
         raise ValueError(f"Unsupported device: {device}")
 
@@ -276,12 +284,15 @@ def unified_call(cls,
                      output_device: Optional[Device],
                      data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
         # use input_device to override the current device, if not None
-        data = cls.curate_arrays_device(data, device=input_device, copy=False)
+        data = cls.curate_arrays_device(data, device=input_device, copy=True)
         # if data is None --> use input device.
         # if input_device is None, by default will invoke GPU interface
         input_type = cls.array_device_type(data) if data is not None else input_device
         # attempt to fetch the op, revert to CPU if GPU impl is not available (func=GPU impl, func_device=cuda
         func, func_device = cls.get_api(cpu_func, func_map, input_type)
+        logging.debug(f"{__name__}: Call Adapter for {cpu_func} with "
+                      f"In Device: {input_device}, Out Device: {output_device}."
+                      f"Mapped to: {func} and actual input device: {func_device}")
         func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True)
 
         curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True)
diff --git a/histoqc/import_wrapper/cupy_extra.py b/histoqc/import_wrapper/cupy_extra.py
index 33d6586..02eb76e 100644
--- a/histoqc/import_wrapper/cupy_extra.py
+++ b/histoqc/import_wrapper/cupy_extra.py
@@ -1,8 +1,17 @@
+from __future__ import annotations
+
 try:
     import cupy
     # cupy.cuda.set_allocator(None)
-    cupy.cuda.set_allocator(cupy.cuda.MemoryPool().malloc)
 except ImportError:
     cupy = None
 finally:
     cp = cupy
+
+
+def cupy_installed() -> bool:
+    try:
+        import cupy
+        return True
+    except ImportError:
+        return False
diff --git a/histoqc/import_wrapper/dask_cuda.py b/histoqc/import_wrapper/dask_cuda.py
new file mode 100644
index 0000000..8b98acf
--- /dev/null
+++ b/histoqc/import_wrapper/dask_cuda.py
@@ -0,0 +1,12 @@
+try:
+    import dask_cuda
+except ImportError:
+    dask_cuda = None
+
+
+def dask_cuda_installed() -> bool:
+    try:
+        import dask_cuda
+        return True
+    except ImportError:
+        return False
diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py
index 56bf82e..1811cbb 100644
--- a/histoqc/wsi_handles/base.py
+++ b/histoqc/wsi_handles/base.py
@@ -144,7 +144,7 @@ def comment(self) -> str:
         ...
 
     @abstractmethod
-    def get_thumbnail(self, new_dim) -> Union[ARRAY, Backend]:
+    def get_thumbnail(self, new_dim) -> PILImage:
         ...
 
     @abstractmethod
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
index d9c6a70..8fd081d 100644
--- a/histoqc/wsi_handles/cuimage_handle.py
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
-import skimage.util
+import sys
+
+# import skimage.util
 from PIL.Image import Image as PILImage
 from cucim.clara import CuImage
 from .base import WSIImageHandle
@@ -16,6 +18,7 @@
 from histoqc.array_adapter import ArrayDeviceType, Device
 from types import MappingProxyType
 import logging
+import os
 
 
 DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA)
@@ -28,7 +31,7 @@ class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
     _associated_images: Optional[Mapping]
 
     # TODO: standalone parser of vendor information
-    dummy_handle: Optional[openslide.OpenSlide]
+    dummy_handle_spill: Optional[openslide.OpenSlide]
 
     def backend_rgba2rgb(self, img: CuImage) -> CuImage:
         # todo: it appears that CuImage does not take care of the alpha channel at all.
@@ -46,7 +49,7 @@ def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device:
         with cp.cuda.Device(device.device_id):
             w, h, *_ = new_size_wh
             arr = cp.array(data)
-            return c_skimage.transform.resize(arr, output_shape=(h, w))
+            return c_skimage.transform.resize(arr, output_shape=(h, w), order=3, anti_aliasing=True)
 
     def __init__(self, fname: str, device_id: Optional[int]):
         super().__init__(fname, device_id)
@@ -54,28 +57,29 @@ def __init__(self, fname: str, device_id: Optional[int]):
         self.handle = CuImage(fname)
         # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a
         # todo - native unified metadata interface for different vendors.
-        self.dummy_handle = openslide.OpenSlide(fname)
-        logging.debug(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}")
+        # todo - workaround as memory spilling option
+        self.dummy_handle_spill = openslide.OpenSlide(fname)
+        logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}")
 
     @LazyProperty
     def background_color(self):
-        return f"#{self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}"
+        return f"#{self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}"
 
     @LazyProperty
     def bounding_box(self):
         dim_width, dim_height = self.dimensions
-        x = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0))
-        y = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0))
-        width = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width))
-        height = int(self.dummy_handle.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height))
+        x = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 0))
+        y = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 0))
+        width = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, dim_width))
+        height = int(self.dummy_handle_spill.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, dim_height))
         return x, y, width, height
 
     @LazyProperty
     def has_bounding_box(self):
-        return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties
-                and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle.properties
-                and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle.properties
-                and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle.properties
+        return (openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties
+                and openslide.PROPERTY_NAME_BOUNDS_X in self.dummy_handle_spill.properties
+                and openslide.PROPERTY_NAME_BOUNDS_WIDTH in self.dummy_handle_spill.properties
+                and openslide.PROPERTY_NAME_BOUNDS_HEIGHT in self.dummy_handle_spill.properties
                 )
 
     @LazyProperty
@@ -84,8 +88,8 @@ def dimensions(self):
 
     @LazyProperty
     def magnification(self):
-        return self.dummy_handle.properties.get("openslide.objective-power") or \
-            self.dummy_handle.properties.get("aperio.AppMag")
+        return self.dummy_handle_spill.properties.get("openslide.objective-power") or \
+            self.dummy_handle_spill.properties.get("aperio.AppMag")
 
     @property
     def level_count(self):
@@ -101,21 +105,21 @@ def level_downsamples(self):
 
     @property
     def vendor(self):
-        return self.dummy_handle.properties.get("openslide.vendor", "NA")
+        return self.dummy_handle_spill.properties.get("openslide.vendor", "NA")
 
     @property
     def mpp_x(self):
-        return self.dummy_handle.properties.get("openslide.mpp-x", "NA")
+        return self.dummy_handle_spill.properties.get("openslide.mpp-x", "NA")
 
     @property
     def mpp_y(self):
-        return self.dummy_handle.properties.get("openslide.mpp-y", "NA")
+        return self.dummy_handle_spill.properties.get("openslide.mpp-y", "NA")
 
     @property
     def comment(self):
-        return self.dummy_handle.properties.get("openslide.comment", "NA")
+        return self.dummy_handle_spill.properties.get("openslide.comment", "NA")
 
-    def get_thumbnail(self, new_dim):
+    def get_thumbnail(self, new_dim) -> cp.ndarray:
         """Get thumbnail
 
         Args:
@@ -125,23 +129,36 @@ def get_thumbnail(self, new_dim):
 
         """
         # from openslide
-        with cp.cuda.Device(self.device.device_id):
+        with (cp.cuda.Device(self.device.device_id)):
             downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, new_dim)))
             level = self.get_best_level_for_downsample(downsample)
             target_w, target_h = (x // int(downsample) for x in self.dimensions)
-            aspect_ratio = self.dimensions[0] / self.dimensions[1]
+
             # resize
             thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level]))
             try:
-                thumb_cp = cp.array(thumb, copy=True)
-                target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
-                resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), clip=True)
-                return c_skimage.util.img_as_ubyte(resized)
+                thumb_cp: cp.ndarray = cp.array(thumb, copy=False)
+                # todo: for reproducibility -> openslide uses LANCZOS filter in PILLOW
+                #    but the exact detail of LANCZOS resampling (e.g., kernel size) is not specified in documentation.
+                #    need to implement our own LANCZOS resampling for cupy later.
+                # aspect_ratio = self.dimensions[0] / self.dimensions[1]
+                # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
+                # resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), order=1,
+                #                                      anti_aliasing=False)
+                # return c_skimage.util.img_as_ubyte(resized)
+                resized_pil = Image.fromarray(thumb_cp.get()
+                                              ).convert("RGB").resize((target_w, target_h),
+                                                                      resample=Image.Resampling.LANCZOS)
             except Exception:
-                logging.warning(f"{__name__} - {self.fname}: OOM on {self.device.device_id}. {traceback.format_exc()}")
-                thumb_np = np.array(thumb, copy=True)
-                thumb_np = skimage.util.img_as_ubyte(thumb_np)
-                return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB")
+                # self.reload()
+                logging.error(f"{__name__} - {self.fname}: OOM on {self.device.device_id}."
+                                f"Use CPU"
+                                f"Error Message Dumped: {traceback.format_exc()}")
+                # thumb_np = np.array(thumb, copy=True)
+                # thumb_np = skimage.util.img_as_ubyte(thumb_np)
+                # return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB")
+                resized_pil = self.dummy_handle_spill.get_thumbnail(new_dim).convert("RGB")
+            return cp.array(resized_pil, copy=False)
 
     def get_best_level_for_downsample(self, down_factor: float) -> int:
         """Return the largest level that's smaller than the target downsample factor, consistent with openslide.
@@ -161,13 +178,16 @@ def get_best_level_for_downsample(self, down_factor: float) -> int:
 
     def region_backend(self, location, level, size, **kwargs) -> CuImage:
         with cp.cuda.Device(self.device.device_id):
-            return self.handle.read_region(location=location, level=level, size=size, **kwargs)
+            return self.handle.read_region(location=location, level=level, size=size,
+                                           num_workers=max(1, os.cpu_count() // 2),
+                                           **kwargs)
 
     @staticmethod
     def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray:
         device = CuImageHandle.validate_device(device)
         with cp.cuda.Device(device.device_id):
             result = cp.array(region, copy=False)
+
             logging.debug(f"{__name__} - {device.device_id} == {result.device}")
             return result
 
@@ -223,16 +243,21 @@ def release(self):
         cp.get_default_pinned_memory_pool().free_all_blocks()
 
     def close_handle(self):
+        logging.debug(f"{__name__}: {self.fname} - closed")
         if hasattr(self, "handle") and self.handle is not None:
             self.handle.close()
             del self.handle
             self.handle = None
-        if self.dummy_handle is not None:
-            self.dummy_handle.close()
-            self.dummy_handle = None
+        if self.dummy_handle_spill is not None:
+            self.dummy_handle_spill.close()
+            self.dummy_handle_spill = None
         # clear the cached map
         self.clear_associated_images()
+        # self.release()
+
+    def reload(self):
         self.release()
+        self.handle = CuImage(self.fname)
 
     @property
     def device_type(self) -> ArrayDeviceType:
diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py
index 1c28c33..3225583 100644
--- a/histoqc/wsi_handles/openslide_handle.py
+++ b/histoqc/wsi_handles/openslide_handle.py
@@ -41,6 +41,7 @@ def __init__(self, fname: str, device_id: Optional[int] = None):
         
         # get background color 
         self._background_color = f"#{self.handle.properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, 'ffffff')}"
+        logging.info(f"{__name__}: {fname}: Create OpenSlideHandle at {device_id}. {self.device}")
 
     def __get_bounding_box(self) -> Tuple[int, int, int, int]:
         (dim_width, dim_height) = self.handle.dimensions
@@ -127,7 +128,7 @@ def backend_to_pil(region: Union[PILImage, np.ndarray]) -> PILImage:
     @staticmethod
     def backend_to_array(region: PILImage, device: Optional[Device]) -> np.ndarray:
         OpenSlideHandle.sanitize_device(device)
-        return np.array(region)
+        return np.array(region, copy=False)
 
     @staticmethod
     def array_to_numpy(arr: np.ndarray) -> np.ndarray:
diff --git a/requirements.txt b/requirements.txt
index 30754ef..0bc584d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ requests~=2.28.1
 Pillow~=9.4.0
 setuptools~=65.6.3
 shapely~=2.0.1
+dask~=2024.1.1
diff --git a/setup.py b/setup.py
index e6a092b..49a6691 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
     setup_requires=['setuptools_scm'],
     extras_require={
         "dicom": ["wsidicom"],
-        "cucim": ["cucim", "cupy"],
+        "cucim": ["cucim", "cupy", "dask-cuda"],
     },
     package_data={
         'histoqc.config': ['*.ini'],

From 761e3d3af8d3a14bbe2d6a6c187f6d9ab7288e53 Mon Sep 17 00:00:00 2001
From: CielAl <cielmercy@gmail.com>
Date: Fri, 10 May 2024 02:24:47 -0400
Subject: [PATCH 6/6] misc fix - matplotlib backend specified in worker init

---
 histoqc/BaseImage.py                      |   6 +-
 histoqc/ClassificationModule.py           | 136 +++++++++++++---------
 histoqc/__main__.py                       |  81 ++++++++-----
 histoqc/_pipeline.py                      |  20 ----
 histoqc/_worker.py                        |   2 +-
 histoqc/{_logging.py => _worker_setup.py} |  44 +++++--
 histoqc/array_adapter/adapter.py          |  35 ++++--
 histoqc/wsi_handles/base.py               |  16 ++-
 histoqc/wsi_handles/cuimage_handle.py     |  75 +++++++-----
 histoqc/wsi_handles/openslide_handle.py   |   4 +-
 rmm_log.txt                               |   2 +
 11 files changed, 256 insertions(+), 165 deletions(-)
 rename histoqc/{_logging.py => _worker_setup.py} (86%)
 create mode 100644 rmm_log.txt

diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py
index 97fe60c..0a8ab55 100644
--- a/histoqc/BaseImage.py
+++ b/histoqc/BaseImage.py
@@ -53,7 +53,7 @@ def image_handle(self) -> Optional[WSIImageHandle]:
     def image_handle(self, image_handle: WSIImageHandle):
         self._image_handle = image_handle
 
-    def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None):
+    def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None, num_threads: Optional[int] = 1):
         dict.__init__(self)
         # init
         self._device_id = device_id
@@ -62,7 +62,9 @@ def __init__(self, fname, fname_outdir, params, device_id: Optional[int] = None)
 
         # dynamically load wsi image handle
         try:
-            self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname, handles, device_id=device_id)
+            self.image_handle: WSIImageHandle = WSIImageHandle.build_handle(fname,
+                                                                            handles, device_id=device_id,
+                                                                            num_threads=num_threads)
         except Exception:
             trace_string = traceback.format_exc()
             logging.error(f"{__name__}: {fname} -- Error Creating Handle - Traceback: {trace_string}")
diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py
index 5096bc9..ef72dae 100644
--- a/histoqc/ClassificationModule.py
+++ b/histoqc/ClassificationModule.py
@@ -4,7 +4,7 @@
 import sys
 from histoqc.array_adapter import ArrayAdapter, Device
 from ast import literal_eval as make_tuple
-import traceback
+from dask.distributed import Lock
 from distutils.util import strtobool
 
 from histoqc.BaseImage import printMaskHelper, BaseImage
@@ -188,6 +188,86 @@ def compute_features(img, params):
     return np.concatenate(feats, axis=2)
 
 
+def new_clf_model(s: BaseImage, params):
+    name = params.get("name", "classTask")
+    logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}")
+    adapter = s.image_handle.adapter
+    model_vals = []
+    model_labels = adapter.sync(np.empty([0, 1]))
+    nsamples_per_example = float(params.get("nsamples_per_example", -1))
+
+    for ex in params["examples"].splitlines():
+        ex = re.split(r'(?<!\W[A-Za-z]):(?!\\)', ex)  # workaround for windows: don't split on i.e. C:\
+        img = io.imread(ex[0])
+        eximg = compute_features(img, params)
+        eximg = eximg.reshape(-1, eximg.shape[2])
+
+        # read mask as grayscale images
+        mask = io.imread(ex[1], as_gray=True)
+        mask = adapter.sync(mask)
+        # convert grayscale images into binary images if images are not binary format
+        if mask.dtype.kind != 'b':
+            # warning log
+            msg = f"Mask file '{ex[1]}' is not a binary image. Automatically converting to binary..."
+            logging.warning(s['filename'] + ' - ' + msg)
+            s["warnings"].append(msg)
+            # convert to binary
+            mask = adapter(img_as_bool)(mask)
+
+        mask = mask.reshape(-1, 1)
+
+        if nsamples_per_example != -1:  # sub sampling required
+            nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]
+                                                                               * nsamples_per_example)
+            idxkeep = np.random.choice(mask.shape[0], size=int(nitems))
+            eximg = eximg[idxkeep, :]
+            mask = mask[idxkeep]
+
+        model_vals.append(eximg)
+        # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray
+        # but we explicitly sync the device of mask and model_labels anyway
+        model_labels = np.vstack((model_labels, mask))
+
+    model_vals = np.vstack(model_vals)
+    clf = RandomForestClassifier(n_jobs=-1)
+    # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}")
+    model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels,
+                                                            device=Device.build(Device.DEVICE_CPU), copy=True)
+    adapter(clf.fit)(model_vals, y=model_labels.ravel())
+    # clf.fit(model_vals, y=model_labels.ravel()
+    logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done")
+    return clf
+
+
+def _cache_clf(s: BaseImage, params, var_name):  # f"model_{name}"
+    # name = params.get("name", "classTask")
+    # model_var = Variable(var_name)
+
+    with Lock(var_name):  # this lock is shared across all threads such that only one thread needs to train the model
+        # then it is shared with all other modules
+        if not params["shared_dict"].get(var_name, False):
+            clf = new_clf_model(s, params)
+            logging.info(f"{s['filename']} - saving to cache...")
+            params["shared_dict"][var_name] = clf
+            logging.info(f"{s['filename']} - cached...")
+
+    logging.info(f"{s['filename']} - Lock Released...")
+    # try:
+    #     clf = model_var.get()
+    # except ValueError:
+    #     clf = new_clf_model(s, params)
+    #     model_var.set(clf)
+
+
+def get_clf(s: BaseImage, params):
+    name = params.get("name", "classTask")
+    var_name = f"model_{name}"
+    _cache_clf(s, params, var_name)
+    # clf = Variable(var_name).get()  # params["shared_dict"]["model_" + name]
+    clf = params["shared_dict"]["model_" + name]
+    return clf
+
+
 def byExampleWithFeatures(s: BaseImage, params):
     name = params.get("name", "classTask")
     logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}")
@@ -207,59 +287,9 @@ def byExampleWithFeatures(s: BaseImage, params):
     adapter = s.image_handle.adapter
     params['adapter'] = adapter
     params['filename'] = s['filename']
-    with params["lock"]:  # this lock is shared across all threads such that only one thread needs to train the model
-        # then it is shared with all other modules
-        if not params["shared_dict"].get("model_" + name, False):
-
-            logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}")
-
-            model_vals = []
-            model_labels = adapter.sync(np.empty([0, 1]))
-
-            for ex in params["examples"].splitlines():
-                ex = re.split(r'(?<!\W[A-Za-z]):(?!\\)', ex)  # workaround for windows: don't split on i.e. C:\
-                img = io.imread(ex[0])
-                eximg = compute_features(img, params)
-                eximg = eximg.reshape(-1, eximg.shape[2])
-
-                # read mask as grayscale images
-                mask = io.imread(ex[1], as_gray=True)
-                mask = adapter.sync(mask)
-                # convert grayscale images into binary images if images are not binary format 
-                if mask.dtype.kind != 'b':
-                    # warning log
-                    msg = f"Mask file '{ex[1]}' is not a binary image. Automatically converting to binary..."
-                    logging.warning(s['filename'] + ' - ' + msg)
-                    s["warnings"].append(msg)
-                    # convert to binary
-                    mask = adapter(img_as_bool)(mask)
-
-                mask = mask.reshape(-1, 1)
-
-                if nsamples_per_example != -1:  # sub sampling required
-                    nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]
-                                                                                       * nsamples_per_example)
-                    idxkeep = np.random.choice(mask.shape[0], size=int(nitems))
-                    eximg = eximg[idxkeep, :]
-                    mask = mask[idxkeep]
-
-                model_vals.append(eximg)
-                # again any component in vstack's input is cupy.ndarray will result in a cupy.ndarray
-                # but we explicitly sync the device of mask and model_labels anyway
-                model_labels = np.vstack((model_labels, mask))
-
-            # do stuff here with model_vals
-            model_vals = np.vstack(model_vals)
-            clf = RandomForestClassifier(n_jobs=-1)
-            # logging.warning(f"{__name__}: {s['filename']} - {np.unique(model_labels.ravel())}")
-            model_vals, model_labels = adapter.curate_arrays_device(model_vals, model_labels,
-                                                                    device=Device.build(Device.DEVICE_CPU), copy=True)
-            adapter(clf.fit)(model_vals, y=model_labels.ravel())
-            # clf.fit(model_vals, y=model_labels.ravel())
-            params["shared_dict"]["model_" + name] = clf
-            logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done")
 
-    clf = params["shared_dict"]["model_" + name]
+    clf = get_clf(s, params)
+    logging.info(f"{__name__} - {s['filename']} Infer with the trained model.")
     img = s.getImgThumb(s["image_work_size"])
     feats = compute_features(img, params)
     logging.debug(f"{__name__} - {s['filename']} - NaN check img: {np.isnan(img).any()}")
diff --git a/histoqc/__main__.py b/histoqc/__main__.py
index b8ec752..a537948 100644
--- a/histoqc/__main__.py
+++ b/histoqc/__main__.py
@@ -11,16 +11,16 @@
 from typing import Tuple, Optional, List
 
 import dask.distributed
-
+from logging.config import dictConfig
 from histoqc._pipeline import BatchedResultFile
 # from histoqc._pipeline import MultiProcessingLogManager
 from copy import deepcopy
-from histoqc._logging import LoggingSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, HDL_OUT_FIELD
+from histoqc._worker_setup import WorkerSetup, MAIN_CONF_BUILD, WORKER_CONF_BUILD, DEFAULT_LOG_FN, HDL_FILE, \
+    HDL_OUT_FIELD, setup_plotting_backend, LoggerConfigBuilder, FMT_DFT, HDL_CONSOLE
 from histoqc._pipeline import load_pipeline
 from histoqc._pipeline import log_pipeline
-from histoqc._pipeline import move_logging_file_handler
+# from histoqc._pipeline import move_logging_file_handler
 # from histoqc._pipeline import setup_logging
-from histoqc._pipeline import setup_plotting_backend
 from histoqc._worker import (worker, worker_setup, worker_success, worker_error, worker_single_process,
                              PARAM_SHARE, KEY_ASSIGN)
 
@@ -75,12 +75,19 @@ def parse_multiprocessing(args: argparse.Namespace,
     return args, is_cuda
 
 
-def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]]):
+def new_cluster(name: str, is_cuda: bool, nprocesses: int, gpu_ids: Optional[List[int]],
+                nvlink: bool, spill_limit, rmm_pool, num_threads: int):
     assert nprocesses > 0, f"Expect number of processes > 0 to launch the cluster. Get {nprocesses}"
     if not is_cuda:
-        return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, )
+        return dask.distributed.LocalCluster(name=name, n_workers=nprocesses, threads_per_worker=num_threads)
     assert cp is not None and dask_cuda is not None
-    return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids, n_workers=nprocesses)
+    return dask_cuda.LocalCUDACluster(name=name, CUDA_VISIBLE_DEVICES=gpu_ids,
+                                      jit_unspill=True,
+                                      threads_per_worker=num_threads,
+                                      device_memory_limit=spill_limit,
+                                      n_workers=nprocesses,
+                                      enable_nvlink=nvlink,
+                                      rmm_pool_size=rmm_pool, )  # rmm_maximum_pool_size="10GB"
 
 
 @managed_pkg_data
@@ -127,6 +134,14 @@ def main(argv=None):
                         nargs='+',
                         help="GPU Devices to use (None=use all). Default: None",
                         default=None)
+    parser.add_argument('--nvlink', action='store_true',
+                        help='whether to enable NVLINK. Only applied when CUDA cluster is launched')
+    parser.add_argument('--rmm_pool', type=str, default="10GB",
+                        help='Pool size. Only applied when CUDA cluster is launched')
+    parser.add_argument('--spill_limit', type=float, default=0.5,
+                        help='Percentage threshold of GPU memory before spill to host memory')
+    parser.add_argument('--num_threads', type=int, default=4,
+                        help='# of threads per worker.')
     args = parser.parse_args(argv)
 
     # --- multiprocessing and logging setup -----------------------------------
@@ -140,15 +155,18 @@ def main(argv=None):
     os.makedirs(args.outdir, exist_ok=True)
     # move_logging_file_handler(logging.getLogger(), args.outdir)
 
-    logging_setup = LoggingSetup(deepcopy(MAIN_CONF_BUILD),
-                                 deepcopy(WORKER_CONF_BUILD),
-                                 capture_warnings=True, filter_warnings='ignore')
+    logging_setup = WorkerSetup(deepcopy(MAIN_CONF_BUILD),
+                                deepcopy(WORKER_CONF_BUILD),
+                                capture_warnings=True, filter_warnings='ignore')
     # setup main proc logger config and file handler.
-    logging_setup.setup_main_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN,
-                                    handler_name=HDL_FILE, out_field=HDL_OUT_FIELD)
+    logging_setup.setup_mainprocess_logger(output_dir=args.outdir, fname=DEFAULT_LOG_FN,
+                                           handler_name=HDL_FILE, out_field=HDL_OUT_FIELD)
 
     # Inherit from the root logger.
+
     main_logger = logging.getLogger(__name__)
+    main_logger.addHandler(logging.StreamHandler(sys.stdout))
+
     if conf_warn_msg:
         main_logger.warning(conf_warn_msg)
 
@@ -220,7 +238,7 @@ def main(argv=None):
         'process_queue': process_queue,
         'config': config,
         'outdir': args.outdir,
-        'lock': mpm.Lock(),  # todo transit to Dask's Lock
+        'lock': dask.distributed.Lock('x'),  # mpm.Lock(),  # todo transit to Dask's Lock
         PARAM_SHARE: mpm.dict(),  # todo transit to Dask's Variable
         'num_files': num_files,
         'force': args.force,
@@ -231,29 +249,30 @@ def main(argv=None):
     setup_plotting_backend(main_logger)
 
     try:
-
         if args.nprocesses > 0:
             local_cluster = new_cluster('histoqc', is_cuda=is_cuda,
-                                        nprocesses=args.nprocesses, gpu_ids=args.gpu_ids)
-            with Client(local_cluster) as client:
-                # register the worker side
-                logging_setup.setup_client(client, forward_name="root")
-
-                # noinspection PyTypeChecker
-                futures_list = [client.submit(worker, idx, file_name, **_shared_state)
-                                for idx, file_name in enumerate(files)]
-
-                for future in as_completed(futures_list):
-                    try:
-                        base_img_finished = future.result()
-                    except Exception as exc:
-                        worker_error(exc, failed)
-                    else:
-                        worker_success(base_img_finished, result_file=results)
+                                        nprocesses=args.nprocesses, gpu_ids=args.gpu_ids,
+                                        nvlink=args.nvlink, spill_limit=args.spill_limit,
+                                        rmm_pool=args.rmm_pool, num_threads=args.num_threads)
+            with local_cluster:
+                with Client(local_cluster) as client:
+                    # register the worker side
+                    logging_setup.setup_client(client, forward_name="root")
+
+                    # noinspection PyTypeChecker
+                    futures_list = [client.submit(worker, idx, file_name, **_shared_state)
+                                    for idx, file_name in enumerate(files)]
+
+                    for future in as_completed(futures_list):
+                        try:
+                            base_img_finished = future.result()
+                        except Exception as exc:
+                            worker_error(exc, failed)
+                        else:
+                            worker_success(base_img_finished, result_file=results)
         else:
             worker_setup()
             worker_single_process(files, failed, results, **_shared_state)
-
     except KeyboardInterrupt:
         main_logger.info("-----REQUESTED-ABORT-----\n")
 
diff --git a/histoqc/_pipeline.py b/histoqc/_pipeline.py
index b49f5a9..21c792f 100644
--- a/histoqc/_pipeline.py
+++ b/histoqc/_pipeline.py
@@ -7,7 +7,6 @@
 import logging
 import multiprocessing
 import os
-import platform
 import shutil
 import warnings
 from contextlib import ExitStack
@@ -117,25 +116,6 @@ def log_pipeline(config, logger: logging.Logger):
     return steps
 
 
-# --- worker process helpers ------------------------------------------
-
-def setup_plotting_backend(logger=None):
-    """loads the correct matplotlib backend
-
-    Parameters
-    ----------
-    logger :
-        the logging.Logger instance
-    """
-    import matplotlib
-    if platform.system() != "Windows" and not os.environ.get('DISPLAY'):
-        if logger is not None:
-            logger.info('no display found. Using non-interactive Agg backend')
-        matplotlib.use('Agg')
-    else:
-        matplotlib.use('TkAgg')
-
-
 class BatchedResultFile:
     """BatchedResultFile encapsulates the results writing
 
diff --git a/histoqc/_worker.py b/histoqc/_worker.py
index d55d5d2..8c29d54 100644
--- a/histoqc/_worker.py
+++ b/histoqc/_worker.py
@@ -5,7 +5,7 @@
 import traceback
 from histoqc.BaseImage import BaseImage
 from histoqc._pipeline import BatchedResultFile
-from histoqc._pipeline import setup_plotting_backend
+from histoqc._worker_setup import setup_plotting_backend
 from typing import List, Optional
 KEY_ASSIGN: str = 'device_assign'
 PARAM_SHARE: str = 'shared_dict'
diff --git a/histoqc/_logging.py b/histoqc/_worker_setup.py
similarity index 86%
rename from histoqc/_logging.py
rename to histoqc/_worker_setup.py
index 7bf7a8f..0de9472 100644
--- a/histoqc/_logging.py
+++ b/histoqc/_worker_setup.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 import multiprocessing
+import platform
+
 import dask
 import threading
 import os
@@ -19,14 +21,13 @@
 HDL_FILE = 'file'
 HDL_OUT_FIELD = 'filename'
 MAIN_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT)
-                   .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT)
                    .add_handler_by_type(handler_type=HDL_FILE, level='WARNING', formatter=FMT_DFT)
-                   .add_root(level="INFO", handlers=[HDL_CONSOLE, HDL_FILE])
+                   .add_root(level="INFO", handlers=[HDL_FILE])
                    .set_handler_target(handler_name=HDL_FILE, out_field=HDL_OUT_FIELD, out_value=DEFAULT_LOG_FN))
 
 WORKER_CONF_BUILD = (LoggerConfigBuilder(version=1).add_formatter_by_type(formatter_type=FMT_DFT)
-                     .add_handler_by_type(handler_type=HDL_CONSOLE, level='DEBUG', formatter=FMT_DFT)
-                     .add_root(level="INFO", handlers=[HDL_CONSOLE]))
+                     .add_handler_by_type(handler_type=HDL_CONSOLE, level='INFO', formatter=FMT_DFT)
+                     .add_root(level="DEBUG", handlers=[HDL_CONSOLE]))  # HDL_CONSOLE
 
 
 def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore'):
@@ -36,6 +37,25 @@ def handle_warning(capture_warnings: bool = True, filter_warnings: str = 'ignore
     logging.captureWarnings(capture_warnings)
 
 
+def setup_plotting_backend(logger=None, distributed: bool = False):
+    """loads the correct matplotlib backend
+
+    Parameters
+    ----------
+    logger :
+        the logging.Logger instance
+    distributed:
+        whether a distributed framework is applied, which forces to enable Agg
+    """
+    import matplotlib
+    if distributed or (platform.system() != "Windows" and not os.environ.get('DISPLAY')):
+        if logger is not None:
+            logger.info('no display found. Using non-interactive Agg backend')
+        matplotlib.use('Agg')
+    else:
+        matplotlib.use('TkAgg')
+
+
 class DaskLogHandler(logging.Handler):
     """Custom Handler, which emits the topic/message from builtin logging.Logger to dask's centralized logging.
 
@@ -68,6 +88,7 @@ def __init__(self, worker_config: Dict, capture_warnings: bool = True, filter_wa
     def setup(self, worker):
         logging.config.dictConfig(self.worker_config)
         handle_warning(capture_warnings=self.capture_warnings, filter_warnings=self.filter_warnings)
+        setup_plotting_backend(logging.getLogger(), distributed=True)
 
     @classmethod
     def build(cls, worker_config: Dict,
@@ -75,7 +96,7 @@ def build(cls, worker_config: Dict,
         return cls(worker_config, capture_warnings, filter_warnings)
 
 
-class LoggingSetup:
+class WorkerSetup:
     _plugin: WorkerPlugin
     _main_build: LoggerConfigBuilder
     _worker_build_list = List[LoggerConfigBuilder]
@@ -126,10 +147,10 @@ def curate_filename(self, *, output_dir: Optional[str], fname: Optional[str],
                                                                out_field=out_field, out_value=dest)
         return self._main_build
 
-    def setup_main_logger(self, *, output_dir: Optional[str],
-                          fname: Optional[str],
-                          handler_name: Optional[str],
-                          out_field: Optional[str]):
+    def setup_mainprocess_logger(self, *, output_dir: Optional[str],
+                                 fname: Optional[str],
+                                 handler_name: Optional[str],
+                                 out_field: Optional[str]):
         if not self.is_main_proc():
             return
         # main logger
@@ -148,7 +169,7 @@ def setup(self, *,
               ):
         if not self.is_main_proc():
             return
-        self.setup_main_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field)
+        self.setup_mainprocess_logger(output_dir=output_dir, fname=fname, handler_name=handler_name, out_field=out_field)
         self.setup_client(client, forward_name)
 
 
@@ -224,3 +245,6 @@ def process_queue(q, ln):
             self._log_queue.put(None)
             lt.join()
             self._log_thread_active = False
+
+# --- worker process helpers ------------------------------------------
+
diff --git a/histoqc/array_adapter/adapter.py b/histoqc/array_adapter/adapter.py
index bb6d71f..cd9767d 100644
--- a/histoqc/array_adapter/adapter.py
+++ b/histoqc/array_adapter/adapter.py
@@ -12,6 +12,13 @@
 from operator import and_, or_, xor, add, mul, sub, matmul, floordiv, truediv
 import skimage
 import re
+import asyncio
+
+
+async def async_write_image(filename, arr: np.ndarray, **kwargs):
+    loop = asyncio.get_running_loop()
+    # noinspection PyArgumentList
+    await loop.run_in_executor(None, skimage.io.imsave, filename, arr, **kwargs)
 
 
 class ArrayDeviceType(Enum):
@@ -142,7 +149,7 @@ def is_numpy(arr: TYPE_NP) -> TypeGuard[TYPE_NP]:
         return isinstance(arr, np.ndarray)
 
     @staticmethod
-    def is_cupy(arr: TYPE_CP) -> TypeGuard[TYPE_CP]:
+    def is_cupy(arr) -> TypeGuard[TYPE_CP]:
         return cupy_installed() and isinstance(arr, cp.ndarray)
 
     @staticmethod
@@ -150,6 +157,7 @@ def to_numpy(arr: TYPE_ARRAY, copy: bool = False) -> TYPE_NP:
         if ArrayAdapter.is_numpy(arr) or isinstance(arr, Number):
             return np.array(arr, copy=copy)
         assert ArrayAdapter.is_cupy(arr)
+        # logging.debug(f"{__name__}: CUPY->NUMPY detected. Expect latency.")
         return arr.get()
 
     @staticmethod
@@ -157,6 +165,10 @@ def to_cupy(arr: TYPE_ARRAY | Number, device: Device, copy: bool = False) -> TYP
         assert isinstance(arr, Number) or (ArrayAdapter.is_array(arr) and cupy_installed()), \
             f"arr must be array and cupy must be installed. {type(arr)}, {cupy_installed()}"
         assert device is not None and isinstance(device, Device) and device.is_cuda(), f"{device} is not CUDA device"
+        if ArrayAdapter.is_cupy(arr) and arr.device.id == device.device_id and not copy:
+            # logging.warning(f"{__name__}: Same Device. Return self:"
+            #                 f" {device.device_id}. {cp.cuda.runtime.deviceGetPCIBusId(arr.device.id)}")
+            return arr
         with cp.cuda.Device(device.device_id):
             return cp.array(arr, copy=copy)
 
@@ -252,7 +264,7 @@ def get_api(cpu_func: Callable,
                 func_map: Mapping[Callable, Callable],
                 device: Optional[Device]) -> Tuple[Callable, Device]:
         if device is None:
-            logging.warning(f"Device unspecified in both input data and input device. Try: gpu:0")
+            logging.debug(f"Device unspecified in both input data and input device. Try: gpu:0")
             device = Device.build(Device.DEVICE_CUDA)
         if device.is_cpu():
             return cpu_func, device
@@ -284,7 +296,7 @@ def unified_call(cls,
                      output_device: Optional[Device],
                      data: TYPE_ARRAY, *args, **kwargs) -> TYPE_ARRAY:
         # use input_device to override the current device, if not None
-        data = cls.curate_arrays_device(data, device=input_device, copy=True)
+        data = cls.curate_arrays_device(data, device=input_device, copy=False)
         # if data is None --> use input device.
         # if input_device is None, by default will invoke GPU interface
         input_type = cls.array_device_type(data) if data is not None else input_device
@@ -293,18 +305,18 @@ def unified_call(cls,
         logging.debug(f"{__name__}: Call Adapter for {cpu_func} with "
                       f"In Device: {input_device}, Out Device: {output_device}."
                       f"Mapped to: {func} and actual input device: {func_device}")
-        func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=True)
+        func_in: TYPE_ARRAY = cls.curate_arrays_device(data, device=func_device, copy=False)
 
-        curated_args = cls.curate_arrays_device(*args, device=func_device, copy=True)
+        curated_args = cls.curate_arrays_device(*args, device=func_device, copy=False)
         curated_kwargs = dict()
         for k, v in kwargs.items():
-            curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=True)
+            curated_kwargs[k] = cls.curate_arrays_device(v, device=func_device, copy=False)
 
         output = cls.call_helper(func_in, func_device, func, *curated_args, **curated_kwargs)
         # only move the output around if the output is an array
         if isinstance(output, tuple):
-            return cls.curate_arrays_device(*output, device=output_device, copy=True)
-        return cls.curate_arrays_device(output, device=output_device, copy=True)
+            return cls.curate_arrays_device(*output, device=output_device, copy=False)
+        return cls.curate_arrays_device(output, device=output_device, copy=False)
 
     @classmethod
     def _validate_device(cls, device: Optional[Device | str | int]) -> Optional[Device]:
@@ -454,8 +466,11 @@ def xor(self, arr1: TYPE_ARRAY, arr2: TYPE_ARRAY) -> TYPE_ARRAY:
                                                op=xor)
 
     @classmethod
-    def imsave(cls, filename: str, arr: TYPE_ARRAY, **kwargs):
+    def imsave(cls, filename: str, arr: TYPE_ARRAY, asynch: bool = True, **kwargs):
         logging.debug(f"{__name__}: SHAPE DBG {arr.shape}")
         arr = cls.curate_arrays_device(arr, device=Device.build(Device.DEVICE_CPU), copy=True)
         logging.debug(f"{__name__}: TYPE DBG {type(arr)}")
-        return skimage.io.imsave(filename, arr, **kwargs)
+        if not asynch:
+            skimage.io.imsave(filename, arr, **kwargs)
+            return
+        asyncio.run(async_write_image(filename, arr, **kwargs))
diff --git a/histoqc/wsi_handles/base.py b/histoqc/wsi_handles/base.py
index 1811cbb..4a0e6ab 100644
--- a/histoqc/wsi_handles/base.py
+++ b/histoqc/wsi_handles/base.py
@@ -23,6 +23,7 @@ class WSIImageHandle(ABC, Generic[T, Backend, ARRAY]):
     fname: str
     _adapter: ArrayAdapter
     _device: Device
+    _num_threads: int
 
     @property
     def device(self) -> Device:
@@ -317,14 +318,15 @@ def parse_wsi_handles(handle_list: str | List[str], delimiter: str,
 
     @classmethod
     def __create_handle(cls, fname: str,
-                        handle_class_list: List[Callable[[str, Optional[int]], "WSIImageHandle"]],
-                        device_id: Optional[int]) -> "WSIImageHandle":
+                        handle_class_list: List[Callable[[str, Optional[int], Optional[int]], "WSIImageHandle"]],
+                        device_id: Optional[int],
+                        num_threads: Optional[int]) -> "WSIImageHandle":
         image_handle = None
         assert fname is None or os.path.exists(fname), f"fname should either be None or point to an existing file"
         for handle_class in handle_class_list:
             # noinspection PyBroadException
             try:
-                image_handle = handle_class(fname, device_id)
+                image_handle = handle_class(fname, device_id, num_threads)
                 break
             except Exception:
                 # current wsi handle class doesn't support this file
@@ -340,18 +342,20 @@ def __create_handle(cls, fname: str,
 
     @classmethod
     @final
-    def build_handle(cls, fname: str, handles: str, device_id: Optional[int]) -> "WSIImageHandle":
+    def build_handle(cls, fname: str, handles: str, device_id: Optional[int],
+                     num_threads: Optional[int]) -> "WSIImageHandle":
         # get handles list
         module_list, attr_list = cls.parse_wsi_handles(handles, delimiter=HANDLE_DELIMITER, wsi_handle_dict=WSI_HANDLES)
         handle_class_list = dynamic_import(module_list, attr_list, return_first=False)
-        image_handle = cls.__create_handle(fname, handle_class_list, device_id)
+        image_handle = cls.__create_handle(fname, handle_class_list, device_id, num_threads)
         return image_handle
 
-    def __init__(self, fname: str, device_id: Optional[int]):
+    def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int]):
         self.fname = fname
         self._device = Device(self.device_type, device_id)
         self._adapter = ArrayAdapter.build(input_device=self._device, output_device=self._device,
                                            contingent_device=self._device)
+        self._num_threads = num_threads if num_threads is not None else 1
 
     @abstractmethod
     def close_handle(self):
diff --git a/histoqc/wsi_handles/cuimage_handle.py b/histoqc/wsi_handles/cuimage_handle.py
index 8fd081d..f331586 100644
--- a/histoqc/wsi_handles/cuimage_handle.py
+++ b/histoqc/wsi_handles/cuimage_handle.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-import sys
-
-# import skimage.util
+import skimage.util
 from PIL.Image import Image as PILImage
 from cucim.clara import CuImage
 from .base import WSIImageHandle
@@ -18,7 +16,7 @@
 from histoqc.array_adapter import ArrayDeviceType, Device
 from types import MappingProxyType
 import logging
-import os
+# import os
 
 
 DEFAULT_DEVICE = Device.build(Device.DEVICE_CUDA)
@@ -26,6 +24,9 @@
 
 class CuImageHandle(WSIImageHandle[CuImage, CuImage, cp.ndarray]):
 
+    # todo: implement GPU-accelerated LANCZOS filter
+    USE_LANCZOS: bool = False
+
     handle: Optional[CuImage]
     fname: str
     _associated_images: Optional[Mapping]
@@ -51,15 +52,16 @@ def region_resize_arr(cls, data: CuImage, new_size_wh: Tuple[int, int], device:
             arr = cp.array(data)
             return c_skimage.transform.resize(arr, output_shape=(h, w), order=3, anti_aliasing=True)
 
-    def __init__(self, fname: str, device_id: Optional[int]):
-        super().__init__(fname, device_id)
+    def __init__(self, fname: str, device_id: Optional[int], num_threads: Optional[int] = 2):
+        super().__init__(fname, device_id, num_threads)
         self._associated_images = None
         self.handle = CuImage(fname)
         # todo - this is only created for parsing the image header/metadata, as the CuCIM v24.02 does not have a
         # todo - native unified metadata interface for different vendors.
         # todo - workaround as memory spilling option
         self.dummy_handle_spill = openslide.OpenSlide(fname)
-        logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}")
+        logging.info(f"{__name__}: {fname}: Create CuImageHandle at {device_id}. {self.device}."
+                     f"Corresponding CUDA device PID: {cp.cuda.runtime.deviceGetPCIBusId(device_id)}")
 
     @LazyProperty
     def background_color(self):
@@ -119,6 +121,19 @@ def mpp_y(self):
     def comment(self):
         return self.dummy_handle_spill.properties.get("openslide.comment", "NA")
 
+    @staticmethod
+    def _resize_osh(thumb_cp: cp.ndarray, width: int, height: int) -> cp.ndarray:
+        resized_pil = Image.fromarray(thumb_cp.get()
+                                      ).convert("RGB").resize((width, height),
+                                                              resample=Image.Resampling.LANCZOS)
+        return c_skimage.util.img_as_ubyte(cp.array(resized_pil, copy=False), force_copy=False)
+
+    @staticmethod
+    def _resize_skimage(thumb_cp: cp.ndarray, width: int, height: int):
+
+        resized = c_skimage.transform.resize(thumb_cp, output_shape=(height, width), order=0)
+        return c_skimage.util.img_as_ubyte(resized)
+
     def get_thumbnail(self, new_dim) -> cp.ndarray:
         """Get thumbnail
 
@@ -134,31 +149,27 @@ def get_thumbnail(self, new_dim) -> cp.ndarray:
             level = self.get_best_level_for_downsample(downsample)
             target_w, target_h = (x // int(downsample) for x in self.dimensions)
 
+            # aspect_ratio = self.dimensions[0] / self.dimensions[1]
+            # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
+
             # resize
-            thumb = self.backend_rgba2rgb(self.region_backend((0, 0), level, self.level_dimensions[level]))
+            # thumb = self.backend_rgba2rgb(self.region_backend(location=None, level=level))
+            #
+            # try:
+            thumb = self.region_backend(level=level)
+            thumb_cp: cp.ndarray = cp.array(thumb, copy=False)
             try:
-                thumb_cp: cp.ndarray = cp.array(thumb, copy=False)
-                # todo: for reproducibility -> openslide uses LANCZOS filter in PILLOW
-                #    but the exact detail of LANCZOS resampling (e.g., kernel size) is not specified in documentation.
-                #    need to implement our own LANCZOS resampling for cupy later.
-                # aspect_ratio = self.dimensions[0] / self.dimensions[1]
-                # target_w, target_h = self.__class__.curate_to_max_dim(target_w, target_h, max(new_dim), aspect_ratio)
-                # resized = c_skimage.transform.resize(thumb_cp, output_shape=(target_h, target_w), order=1,
-                #                                      anti_aliasing=False)
-                # return c_skimage.util.img_as_ubyte(resized)
-                resized_pil = Image.fromarray(thumb_cp.get()
-                                              ).convert("RGB").resize((target_w, target_h),
-                                                                      resample=Image.Resampling.LANCZOS)
+                if CuImageHandle.USE_LANCZOS:
+                    return CuImageHandle._resize_osh(thumb_cp, target_w, target_h)
+                else:
+                    return CuImageHandle._resize_skimage(thumb_cp, target_w, target_h)
             except Exception:
                 # self.reload()
                 logging.error(f"{__name__} - {self.fname}: OOM on {self.device.device_id}."
-                                f"Use CPU"
-                                f"Error Message Dumped: {traceback.format_exc()}")
-                # thumb_np = np.array(thumb, copy=True)
-                # thumb_np = skimage.util.img_as_ubyte(thumb_np)
-                # return Image.fromarray(thumb_np).resize((target_w, target_h)).convert("RGB")
+                              f"Use CPU"
+                              f"Error Message Dumped: {traceback.format_exc()}. Try CPU method...")
                 resized_pil = self.dummy_handle_spill.get_thumbnail(new_dim).convert("RGB")
-            return cp.array(resized_pil, copy=False)
+                return cp.array(resized_pil, copy=False)
 
     def get_best_level_for_downsample(self, down_factor: float) -> int:
         """Return the largest level that's smaller than the target downsample factor, consistent with openslide.
@@ -176,11 +187,15 @@ def get_best_level_for_downsample(self, down_factor: float) -> int:
         # find the indices of the down_indices that points to the best downsample factor value
         return cast(int, down_indices[down_values.argmax()])
 
-    def region_backend(self, location, level, size, **kwargs) -> CuImage:
+    def region_backend(self, location=None, level=None, size=None, **kwargs) -> CuImage:
+        assert level is not None
         with cp.cuda.Device(self.device.device_id):
-            return self.handle.read_region(location=location, level=level, size=size,
-                                           num_workers=max(1, os.cpu_count() // 2),
-                                           **kwargs)
+            if location is not None and size is not None:
+                return self.handle.read_region(location=location, level=level, size=size,
+                                               num_workers=self._num_threads,
+                                               **kwargs)
+            assert location is None and size is None
+            return self.handle.read_region(level=level, num_workers=self._num_threads, **kwargs)
 
     @staticmethod
     def backend_to_array(region: Union[CuImage, cp.ndarray], device: Optional[Device]) -> cp.ndarray:
diff --git a/histoqc/wsi_handles/openslide_handle.py b/histoqc/wsi_handles/openslide_handle.py
index 3225583..435d53e 100644
--- a/histoqc/wsi_handles/openslide_handle.py
+++ b/histoqc/wsi_handles/openslide_handle.py
@@ -29,8 +29,8 @@ def sanitize_device(cls, device: Optional[Device]):
     def backend_rgba2rgb(self, img) -> PILImage:
         return rgba2rgb_pil(img, self.background_color)
 
-    def __init__(self, fname: str, device_id: Optional[int] = None):
-        super().__init__(fname, device_id)
+    def __init__(self, fname: str, device_id: Optional[int] = None, num_threads: Optional[int] = 1):
+        super().__init__(fname, device_id, num_threads=num_threads)
         self.handle = openslide.OpenSlide(fname)
         self._has_bounding_box = True
         self._bounding_box = self.__get_bounding_box()
diff --git a/rmm_log.txt b/rmm_log.txt
new file mode 100644
index 0000000..46fb05f
--- /dev/null
+++ b/rmm_log.txt
@@ -0,0 +1,2 @@
+[160761][01:45:16:367101][info  ] ----- RMM LOG BEGIN [PTDS DISABLED] -----
+[160761][01:45:16:367132][error ] [A][Stream 0x1][Upstream 10000000000B][FAILURE maximum pool size exceeded]