diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..5494782 --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,43 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + push: + branches: [ "master" ] + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: histotools/HistoQC + + - name: Build and push Docker image + uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile index 0af09e7..7a82c02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,4 @@ # Dockerfile for HistoQC. -# # This Dockerfile uses two stages. In the first, the project's python dependencies are # installed. This requires a C compiler. In the second stage, the HistoQC directory and # the python environment are copied over. We do not require a C compiler in the second @@ -19,16 +18,23 @@ RUN python -m venv venv \ # We force this so there is no error even if the dll does not exist. && rm -f libopenslide-0.dll -FROM python:3.8-slim +FROM rayproject/ray-ml:latest-gpu ARG DEBIAN_FRONTEND=noninteractive +USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ libopenslide0 \ libtk8.6 \ procps \ && rm -rf /var/lib/apt/lists/* + WORKDIR /opt/HistoQC COPY --from=builder /opt/HistoQC/ . -ENV PATH="/opt/HistoQC/venv/bin:$PATH" +RUN pip install . + +USER ray WORKDIR /data + +# CMD ["bash"] +CMD [ "python", "-m", "histoqc", "--help" ] \ No newline at end of file diff --git a/Readme.md b/Readme.md index e12081c..6f19e01 100644 --- a/Readme.md +++ b/Readme.md @@ -1,199 +1,214 @@ -# HistoQC ---- - -HistoQC is an open-source quality control tool for digital pathology slides - -![screenshot](https://user-images.githubusercontent.com/9681868/40330248-a39603a2-5d4c-11e8-9d16-cc13fd9e21d4.png) - -# Requirements ---- - -Tested with Python 3.7 and 3.8 -Note: the DockerFile installs Python 3.8, so if your goal is reproducibility you may want to take this into account - -Requires: - -1. openslide - -And the following additional python package: - -1. python-openslide -2. matplotlib -3. numpy -4. scipy -5. skimage -6. sklearn -7. pytest (optional) - - -You can likely install the python requirements using something like (note python 3+ requirement): - -pip3 install -r requirements.txt - -The library versions have been pegged to the current validated ones. Later versions are likely to work but may not allow for cross-site/version reproducibility (typically a bad thing in quality control). - -Openslide binaries will have to be installed separately as per individual o/s instructions - -The most basic docker image can be created with the included (7-line) Dockerfile. - -# Basic Usage ---- - -Running the pipeline is now done via a python module: - -``` -C:\Research\code\HistoQC>python -m histoqc --help -usage: __main__.py [-h] [-o OUTDIR] [-p BASEPATH] [-c CONFIG] [-f] [-b BATCH] - [-n NPROCESSES] [--symlink TARGET_DIR] - input_pattern [input_pattern ...] - -positional arguments: - input_pattern input filename pattern (try: *.svs or - target_path/*.svs ), or tsv file containing list of - files to analyze - -optional arguments: - -h, --help show this help message and exit - -o OUTDIR, --outdir OUTDIR - outputdir, default ./histoqc_output_YYMMDD-hhmmss - -p BASEPATH, --basepath BASEPATH - base path to add to file names, helps when producing - data using existing output file as input - -c CONFIG, --config CONFIG - config file to use - -f, --force force overwriting of existing files - -b BATCH, --batch BATCH - break results file into subsets of this size - -n NPROCESSES, --nprocesses NPROCESSES - number of processes to launch - --symlink TARGET_DIR create symlink to outdir in TARGET_DIR - -``` - -HistoQC now has a httpd server which allows for improved result viewing, it can be accessed like so: - -``` -C:\Research\code\HistoQC>python -m histoqc.ui --help -usage: __main__.py [-h] [--bind ADDRESS] [--port PORT] [--deploy OUT_DIR] - [data_directory] - -positional arguments: - data_directory Specify the data directory [default:current directory] - -optional arguments: - -h, --help show this help message and exit - --bind ADDRESS, -b ADDRESS - Specify alternate bind address [default: all - interfaces] - --port PORT Specify alternate port [default: 8000] - --deploy OUT_DIR Write UI to OUT_DIR - -``` - -Lastly, supplied configuration files can be viewed and modified like so: - -``` - -C:\Research\code\HistoQC>python -m histoqc.config --help -usage: __main__.py [-h] [--list] [--show NAME] - -show example config - -optional arguments: - -h, --help show this help message and exit - --list list available configs - --show NAME show named example config - - -``` - - -If you would like, you can install HistoQC into your system by using - -``` -git clone https://github.com/choosehappy/HistoQC.git -cd HistoQC -python -m pip install --upgrade pip # (optional) upgrade pip to newest version -pip install -r requirements.txt # install pinned versions of packages -pip install . -``` - -Installed or simply git-cloned, a typical command line for running the tool thus looks like: - -``` -python -m histoqc -c v2.1 -n 3 "*.svs" -``` - -which will use 3 process to operate on all svs files using the named configuration file config_v2.1.ini from the config directory. - -Alternatively one can specify their own modified config file using an absolute or relative filename: - -``` -python -m histoqc.config --show light > mylight.ini -python -m histoqc -c ./mylight.ini -n 3 "*.svs" -``` - - -Afterward completion of slide processing you can view the results in your web-browser simply by following the directions after typing: - -``` -python -m histoqc.ui -``` - -Which will likely say something like: -``` -HistoQC data directory: 'D:\temp\HistoQC' -Serving HistoQC UI on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ... -``` - -Allowing you to browse to http://localhost:8000/ to select your results.tsv file. - -In case of errors, HistoQC can be run with the same output directory and will begin where it left off, identifying completed images by the presence of an existing directory. - -This can also be done remotely, but is a bit more complex, see advanced usage. - -# Configuration modifications ---- - -HistoQC's performance is significantly improved if you select an appropriate configuration file as a starting point and modify it to suit your specific use case. - -If you would like to see a list of provided config files to start you off, you can type - -``` -python -m histoqc.config --list -``` - -and then you can select one and write it to file like so for your modification and tuning: - -``` -python -m histoqc.config --show ihc > myconfig_ihc.ini -```` - - - -# Advanced Usage ---- - - -See [wiki](https://github.com/choosehappy/HistoQC/wiki) - - -# Notes - -Information from HistoQC users appears below: - -1. the new Pannoramic 1000 scanner, objective-magnification is given as 20, when a 20x objective lense and a 2x aperture boost is used, i.e. image magnification is actually 40x. While their own CaseViewer somehow determines that a boost exists and ends up with 40x when objective-magnification in Slidedat.ini is at 20, openslide and bioformats give 20x. - -1.1. When converted to svs by CaseViewer, the MPP entry in ImageDescription meta-parameter give the average of the x and y mpp. Both values are slightly different for the new P1000 and can be found in meta-parameters of svs as tiff.XResolution and YResolution (inverse values, so have to be converted, also respecting ResolutionUnit as centimeter or inch - -# Citation ---- -If you find this software useful, please drop me a line and/or consider citing it: - -"HistoQC: An Open-Source Quality Control Tool for Digital Pathology Slides", Janowczyk A., Zuo R., Gilmore H., Feldman M., Madabhushi A., JCO Clinical Cancer Informatics, 2019 - -Manuscript available [here](http://www.andrewjanowczyk.com/histoqc-an-open-source-quality-control-tool-for-digital-pathology-slides/) - -“Assessment of a computerized quantitative quality control tool for kidney whole slide image biopsies”, Chen Y., Zee J., Smith A., Jayapandian C., Hodgin J., Howell D., Palmer M., Thomas D., Cassol C., Farris A., Perkinson K., Madabhushi A., Barisoni L., Janowczyk A., Journal of Pathology, 2020 - -Manuscript available [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8392148/) +# HistoQC + + +HistoQC is an open-source quality control tool for digital pathology slides + +![screenshot](https://user-images.githubusercontent.com/9681868/40330248-a39603a2-5d4c-11e8-9d16-cc13fd9e21d4.png) + +# Requirements + +Tested with Python 3.7 and 3.8 +Note: the DockerFile installs Python 3.8, so if your goal is reproducibility you may want to take this into account + +Requires: + +1. openslide + +And the following additional python package: + +1. python-openslide +2. matplotlib +3. numpy +4. scipy +5. skimage +6. sklearn +7. pytest (optional) + + +You can likely install the python requirements using something like (note python 3+ requirement): + +pip3 install -r requirements.txt + +The library versions have been pegged to the current validated ones. Later versions are likely to work but may not allow for cross-site/version reproducibility (typically a bad thing in quality control). + +Openslide binaries will have to be installed separately as per individual o/s instructions + +The most basic docker image can be created with the included (7-line) Dockerfile. + + +# Installation + +You can install HistoQC into your system by using + +```bash +git clone https://github.com/choosehappy/HistoQC.git +cd HistoQC +python -m pip install --upgrade pip # (optional) upgrade pip to newest version +pip install -r requirements.txt # (required) install pinned versions of packages +pip install . # (recommended) install HistoQC as a package +``` +Note that `pip install .` will install HistoQC as a python package in your environment. If you do not want to install HistoQC as a package, you will only be able to run HistoQC from the `HistoQC` directory. + +# Basic Usage + +## histoqc CLI + +Running the pipeline is now done via a python module: + +``` +C:\Research\code\HistoQC>python -m histoqc --help +usage: __main__.py [-h] [-o OUTDIR] [-p BASEPATH] [-c CONFIG] [-f] [-b BATCH] + [-n NPROCESSES] [--symlink TARGET_DIR] + input_pattern [input_pattern ...] + +positional arguments: + input_pattern input filename pattern (try: *.svs or + target_path/*.svs ), or tsv file containing list of + files to analyze + +optional arguments: + -h, --help show this help message and exit + -o OUTDIR, --outdir OUTDIR + outputdir, default ./histoqc_output_YYMMDD-hhmmss + -p BASEPATH, --basepath BASEPATH + base path to add to file names, helps when producing + data using existing output file as input + -c CONFIG, --config CONFIG + config file to use + -f, --force force overwriting of existing files + -b BATCH, --batch BATCH + break results file into subsets of this size + -n NPROCESSES, --nprocesses NPROCESSES + number of processes to launch + --symlink TARGET_DIR create symlink to outdir in TARGET_DIR + +``` + +Installed or simply git-cloned, a typical command line for running the tool thus looks like: + +```bash +python -m histoqc -c v2.1 -n 3 "*.svs" +``` + +which will use 3 process to operate on all svs files using the named configuration file config_v2.1.ini from the config directory. + +In case of errors, HistoQC can be run with the same output directory and will begin where it left off, identifying completed images by the presence of an existing directory. + +## histoqc.config CLI +Supplied configuration files can be viewed and modified like so: + +``` + +C:\Research\code\HistoQC>python -m histoqc.config --help +usage: __main__.py [-h] [--list] [--show NAME] + +show example config + +optional arguments: + -h, --help show this help message and exit + --list list available configs + --show NAME show named example config + + +``` + + + +Alternatively one can specify their own modified config file using an absolute or relative filename: + +```bash +python -m histoqc.config --show light > mylight.ini +python -m histoqc -c ./mylight.ini -n 3 "*.svs" +``` + +## histoqc.ui CLI + +HistoQC now has a httpd server which allows for improved result viewing, it can be accessed like so: + +``` +C:\Research\code\HistoQC>python -m histoqc.ui --help +usage: __main__.py [-h] [--bind ADDRESS] [--port PORT] [--deploy OUT_DIR] + [data_directory] + +positional arguments: + data_directory Specify the data directory [default:current directory] + +optional arguments: + -h, --help show this help message and exit + --bind ADDRESS, -b ADDRESS + Specify alternate bind address [default: all + interfaces] + --port PORT Specify alternate port [default: 8000] + --deploy OUT_DIR Write UI to OUT_DIR + +``` + +After completion of slide processing, view results in your web-browser simply by running the following command *from within the output directory* (saved in the **histoqc_output_YYMMDD-hhmmss** format by default. See histoqc CLI -o option) + +```bash +cd histoqc_output_YYMMDD-hhmmss +python -m histoqc.ui +``` + +... OR set data_directory to the output directory explicitly: +```bash +python -m histoqc.ui ./histoqc_output_YYMMDD-hhmmss +``` + +Which will likely say something like: +``` +HistoQC data directory: 'D:\temp\HistoQC' +Serving HistoQC UI on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ... +``` + +Allowing you to browse to http://localhost:8000/ to select your results.tsv file. + +This can also be done remotely, but is a bit more complex, see advanced usage. + +# Configuration modifications + + +HistoQC's performance is significantly improved if you select an appropriate configuration file as a starting point and modify it to suit your specific use case. + +If you would like to see a list of provided config files to start you off, you can type + +```bash +python -m histoqc.config --list +``` + +and then you can select one and write it to file like so for your modification and tuning: + +```bash +python -m histoqc.config --show ihc > myconfig_ihc.ini +``` + + + +# Advanced Usage + + + +See [wiki](https://github.com/choosehappy/HistoQC/wiki) + + +# Notes + +Information from HistoQC users appears below: + +1. the new Pannoramic 1000 scanner, objective-magnification is given as 20, when a 20x objective lense and a 2x aperture boost is used, i.e. image magnification is actually 40x. While their own CaseViewer somehow determines that a boost exists and ends up with 40x when objective-magnification in Slidedat.ini is at 20, openslide and bioformats give 20x. + +1.1. When converted to svs by CaseViewer, the MPP entry in ImageDescription meta-parameter give the average of the x and y mpp. Both values are slightly different for the new P1000 and can be found in meta-parameters of svs as tiff.XResolution and YResolution inverse values, so have to be converted, also respecting ResolutionUnit as centimeter or inch + + + +# Citation + +If you find this software useful, please drop me a line and/or consider citing it: + +"HistoQC: An Open-Source Quality Control Tool for Digital Pathology Slides", Janowczyk A., Zuo R., Gilmore H., Feldman M., Madabhushi A., JCO Clinical Cancer Informatics, 2019 + +Manuscript available [here](http://www.andrewjanowczyk.com/histoqc-an-open-source-quality-control-tool-for-digital-pathology-slides/) + +“Assessment of a computerized quantitative quality control tool for kidney whole slide image biopsies”, Chen Y., Zee J., Smith A., Jayapandian C., Hodgin J., Howell D., Palmer M., Thomas D., Cassol C., Farris A., Perkinson K., Madabhushi A., Barisoni L., Janowczyk A., Journal of Pathology, 2020 + +Manuscript available [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8392148/) diff --git a/histoqc/BaseImage.py b/histoqc/BaseImage.py index 6e656e1..947f7c7 100644 --- a/histoqc/BaseImage.py +++ b/histoqc/BaseImage.py @@ -4,6 +4,7 @@ import zlib, dill from distutils.util import strtobool from PIL import Image +import re from typing import Union, Tuple #os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically from histoqc.import_wrapper.openslide import openslide @@ -45,7 +46,7 @@ def __init__(self, fname, fname_outdir, params): if not self["base_mag"]: logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?") - raise ValueError(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?") + return -1 self.addToPrintList("base_mag", self["base_mag"]) @@ -109,7 +110,24 @@ def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool else: return (osh.get_best_level_for_downsample(downsample_factor), False) + @staticmethod + def is_valid_size(size: str): + size = str(size) + return _PATTERN_MAG.fullmatch(size) is not None + + @staticmethod + def validate_slide_size(size: str, assertion: bool = False): + size = str(size) + if assertion: + assert BaseImage.is_valid_size(size), f"{size}: does not match pattern {_REGEX_MAG}" + # for now just cast it to str + return size + def getImgThumb(self, size: str): + # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't + # really handle it that way, and trace of previous coding also suggest that there actually lack a params + # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here. + size = BaseImage.validate_slide_size(size, assertion=False) # get img key with size key = "img_" + str(size) # return the img if it exists @@ -124,8 +142,17 @@ def getImgThumb(self, size: str): (bx, by, bwidth, bheight) = self["img_bbox"] img_base_size = (bwidth, bheight) + # barricade the invalid input first + # can't determine operation. + if not BaseImage.is_valid_size(size): + # print out error message + err_msg = f"{self['filename']}: invalid arguments - {size}" + logging.error(err_msg) + self["warnings"].append(err_msg) + return + # specifies a desired operating magnification - if size.endswith(("X","x")) and size[:-1].replace(".", "0", 1).isdigit(): + if size.endswith(("X", "x")) and size[:-1].replace(".", "0", 1).isdigit(): target_mag = float(size.upper().split("X")[0]) # magnification base_mag = self["base_mag"] @@ -176,15 +203,6 @@ def getImgThumb(self, size: str): target_dims = getDimensionsByOneDim(self, int(size)) target_sampling_factor = img_base_size[0] / target_dims[0] self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) - - # can't determine operation. - else: - # print out error message - err_msg = f"{self['filename']}: invalid arguments - {size}" - logging.error(err_msg) - self["warnings"].append(err_msg) - return - return self[key] def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float): @@ -250,12 +268,14 @@ def resizeTileDownward(self, target_downsampling_factor, level): output = np.concatenate(output, axis=1) return output + def rgba2rgb(s: BaseImage, img): bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff") thumb = Image.new("RGB", img.size, bg_color) thumb.paste(img, None, img) return thumb + def printMaskHelper(type: str, prev_mask, curr_mask): if type == "relative2mask": if len(prev_mask.nonzero()[0]) == 0: @@ -270,30 +290,53 @@ def printMaskHelper(type: str, prev_mask, curr_mask): return str(-1) +def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]: + """Parse magnification to float + Args: + mag: + + Returns: + Validated size factor either as a float number or "NA" (MAG_NA) + """ + if isinstance(mag, (int, float)): + return float(mag) + numeric_mag_str_flag = BaseImage.is_valid_size(mag) + invalid_flag = mag == MAG_NA or not numeric_mag_str_flag + if invalid_flag: + return MAG_NA + # regex determines X must either be abscent or at the end of the string + if "X" in mag.upper(): + mag = mag[0:-1] + return float(mag) + + # this function is seperated out because in the future we hope to have automatic detection of # magnification if not present in open slide, and/or to confirm openslide base magnification def getMag(s: BaseImage, params) -> Union[float, None]: logging.info(f"{s['filename']} - \tgetMag") osh = s["os_handle"] mag = osh.properties.get("openslide.objective-power") or \ - osh.properties.get("aperio.AppMag") or None + osh.properties.get("aperio.AppMag") or MAG_NA # if mag or strtobool(params.get("confirm_base_mag", "False")): # # do analysis work here # logging.warning(f"{s['filename']} - Unknown base magnification for file") # s["warnings"].append(f"{s['filename']} - Unknown base magnification for file") # return None # else: - if not mag: - return None + # workaround for unspecified mag -- with or without automatic detection it might be preferred to have + # mag predefined + mag = mag or parsed_mag(params.get("base_mag")) + # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise + # the exception here. + return float(mag) if mag is not MAG_NA else MAG_NA - return float(mag) def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]: (x, y, width, height) = s["img_bbox"] # calulate the width or height depends on dim if width > height: h = int(dim * height / width) - return (dim, h) + return dim, h else: w = int(dim * width / height) - return (w, dim) + return w, dim diff --git a/histoqc/ClassificationModule.py b/histoqc/ClassificationModule.py index 68f6851..3771851 100644 --- a/histoqc/ClassificationModule.py +++ b/histoqc/ClassificationModule.py @@ -191,7 +191,7 @@ def byExampleWithFeatures(s, params): # convert grayscale images into binary images if images are not binary format if mask.dtype.kind != 'b': # warning log - msg = f"Mask file '{ex[1]}' is not a binary image" + msg = f"Mask file '{ex[1]}' is not a binary image. Automatically converting to binary..." logging.warning(s['filename'] + ' - ' + msg) s["warnings"].append(msg) # convert to binary diff --git a/histoqc/DeconvolutionModule.py b/histoqc/DeconvolutionModule.py index 25237dd..dbb41f1 100644 --- a/histoqc/DeconvolutionModule.py +++ b/histoqc/DeconvolutionModule.py @@ -1,76 +1,76 @@ -import logging -import os -import sys -import numpy as np -from skimage import io, color, img_as_ubyte -from skimage.exposure import rescale_intensity -from skimage.color import separate_stains -from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb -from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \ - hpx_from_rgb # need to load all of these in case the user selects them -from distutils.util import strtobool - -import matplotlib.pyplot as plt - - -def separateStains(s, params): - logging.info(f"{s['filename']} - \tseparateStains") - stain = params.get("stain", "") - use_mask = strtobool(params.get("use_mask", "True")) - - if stain == "": - logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains") - sys.exit(1) - return - - stain_matrix = getattr(sys.modules[__name__], stain, "") - - if stain_matrix == "": - logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") - sys.exit(1) - return - - mask = s["img_mask_use"] - - if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early - for c in range(3): - s.addToPrintList(f"deconv_c{c}_std", str(-100)) - s.addToPrintList(f"deconv_c{c}_mean", str(-100)) - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) - - logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " - f"remains detectable! Saving Black images") - s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue " - f"remains detectable! Saving Black images") - - return - - img = s.getImgThumb(s["image_work_size"]) - dimg = separate_stains(img, stain_matrix) - - for c in range(0, 3): - dc = dimg[:, :, c] - - clip_max_val = np.quantile(dc.flatten(), .99) - dc = np.clip(dc, a_min=0, a_max=clip_max_val) - - - if use_mask: - dc_sub = dc[mask] - dc_min = dc_sub.min() - dc_max = dc_sub.max() - - s.addToPrintList(f"deconv_c{c}_mean", str(dc_sub.mean())) - s.addToPrintList(f"deconv_c{c}_std", str(dc_sub.std())) - else: - mask = 1.0 - dc_min = dc.min() - dc_max = dc.max() - - s.addToPrintList(f"deconv_c{c}_mean", str(dc.mean())) - s.addToPrintList(f"deconv_c{c}_std", str(dc.std())) - - dc = (dc - dc_min) / float(dc_max - dc_min) * mask - io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc)) - - return +import logging +import os +import sys +import numpy as np +from skimage import io, color, img_as_ubyte +from skimage.exposure import rescale_intensity +from skimage.color import separate_stains +from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb +from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \ + hpx_from_rgb # need to load all of these in case the user selects them +from distutils.util import strtobool + +import matplotlib.pyplot as plt + + +def separateStains(s, params): + logging.info(f"{s['filename']} - \tseparateStains") + stain = params.get("stain", "") + use_mask = strtobool(params.get("use_mask", "True")) + + if stain == "": + logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains") + sys.exit(1) + return + + stain_matrix = getattr(sys.modules[__name__], stain, None) + + if stain_matrix is None: + logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") + sys.exit(1) + return + + mask = s["img_mask_use"] + + if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early + for c in range(3): + s.addToPrintList(f"deconv_c{c}_std", str(-100)) + s.addToPrintList(f"deconv_c{c}_mean", str(-100)) + io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) + + logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " + f"remains detectable! Saving Black images") + s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue " + f"remains detectable! Saving Black images") + + return + + img = s.getImgThumb(s["image_work_size"]) + dimg = separate_stains(img, stain_matrix) + + for c in range(0, 3): + dc = dimg[:, :, c] + + clip_max_val = np.quantile(dc.flatten(), .99) + dc = np.clip(dc, a_min=0, a_max=clip_max_val) + + + if use_mask: + dc_sub = dc[mask] + dc_min = dc_sub.min() + dc_max = dc_sub.max() + + s.addToPrintList(f"deconv_c{c}_mean", str(dc_sub.mean())) + s.addToPrintList(f"deconv_c{c}_std", str(dc_sub.std())) + else: + mask = 1.0 + dc_min = dc.min() + dc_max = dc.max() + + s.addToPrintList(f"deconv_c{c}_mean", str(dc.mean())) + s.addToPrintList(f"deconv_c{c}_std", str(dc.std())) + + dc = (dc - dc_min) / float(dc_max - dc_min) * mask + io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc)) + + return diff --git a/histoqc/LocalTextureEstimationModule.py b/histoqc/LocalTextureEstimationModule.py index 6b970e3..8cd99e3 100644 --- a/histoqc/LocalTextureEstimationModule.py +++ b/histoqc/LocalTextureEstimationModule.py @@ -1,47 +1,53 @@ -import logging -import numpy as np -from skimage import color -from distutils.util import strtobool -from skimage.feature import greycomatrix, greycoprops -import matplotlib.pyplot as plt - - - -def estimateGreyComatrixFeatures(s, params): - prefix = params.get("prefix", None) - prefix = prefix+"_" if prefix else "" - - logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}") - patch_size = int(params.get("patch_size", 32)) - npatches = int(params.get("npatches", 100)) - nlevels = int(params.get("nlevels", 8)) - feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') - invert = strtobool(params.get("invert", "False")) - mask_name = params.get("mask_name","img_mask_use") - - - img = s.getImgThumb(s["image_work_size"]) - img = color.rgb2gray(img) - - mask = s[mask_name] if not invert else ~s[mask_name] - maskidx = mask.nonzero() - maskidx = np.asarray(maskidx).transpose() - idx = np.random.choice(maskidx.shape[0], npatches) - - results = [] - - for id in idx: - r, c = maskidx[id, :] - patch = img[r:r + patch_size, c:c + patch_size] - glcm = greycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5], - angles=[0], levels=nlevels, symmetric=True, normed=True) - - results.append([greycoprops(glcm, prop=feat) for feat in feats]) - - results = np.asarray(results).squeeze() - - for vals, feat in zip(results.transpose(), feats): - s.addToPrintList(f"{prefix}{feat}", str(vals.mean())) - s.addToPrintList(f"{prefix}{feat}_std", str(vals.std())) - - return +import logging +import numpy as np +from skimage import color +from distutils.util import strtobool +from skimage.feature import greycomatrix, greycoprops +import matplotlib.pyplot as plt + + + +def estimateGreyComatrixFeatures(s, params): + prefix = params.get("prefix", None) + prefix = prefix+"_" if prefix else "" + + logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}") + patch_size = int(params.get("patch_size", 32)) + npatches = int(params.get("npatches", 100)) + nlevels = int(params.get("nlevels", 8)) + feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') + invert = strtobool(params.get("invert", "False")) + mask_name = params.get("mask_name","img_mask_use") + + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + + mask = s[mask_name] if not invert else ~s[mask_name] + if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask + msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask" + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + return + + maskidx = mask.nonzero() + maskidx = np.asarray(maskidx).transpose() + idx = np.random.choice(maskidx.shape[0], npatches) + + results = [] + + for id in idx: + r, c = maskidx[id, :] + patch = img[r:r + patch_size, c:c + patch_size] + glcm = greycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5], + angles=[0], levels=nlevels, symmetric=True, normed=True) + + results.append([greycoprops(glcm, prop=feat) for feat in feats]) + + results = np.asarray(results).squeeze() + + for vals, feat in zip(results.transpose(), feats): + s.addToPrintList(f"{prefix}{feat}", str(vals.mean())) + s.addToPrintList(f"{prefix}{feat}_std", str(vals.std())) + + return diff --git a/histoqc/SaveModule.py b/histoqc/SaveModule.py index 2828513..627f8ca 100644 --- a/histoqc/SaveModule.py +++ b/histoqc/SaveModule.py @@ -1,91 +1,91 @@ -import logging -import os -from skimage import io, img_as_ubyte -from distutils.util import strtobool -from skimage import color -import numpy as np - -import matplotlib.pyplot as plt - - -def blend2Images(img, mask): - if (img.ndim == 3): - img = color.rgb2gray(img) - if (mask.ndim == 3): - mask = color.rgb2gray(mask) - img = img[:, :, None] * 1.0 # can't use boolean - mask = mask[:, :, None] * 1.0 - out = np.concatenate((mask, img, mask), 2) - return out - - -def saveFinalMask(s, params): - logging.info(f"{s['filename']} - \tsaveUsableRegion") - - mask = s["img_mask_use"] - for mask_force in s["img_mask_force"]: - mask[s[mask_force]] = 0 - - io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask)) - - if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask? - img = s.getImgThumb(s["image_work_size"]) - out = blend2Images(img, mask) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out)) - - return - - -def saveAssociatedImage(s, key:str, dim:int): - logging.info(f"{s['filename']} - \tsave{key.capitalize()}") - osh = s["os_handle"] - - if not key in osh.associated_images: - message = f"{s['filename']}- \tsave{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images" - logging.warning(message) - s["warnings"].append(message) - return - - # get asscociated image by key - associated_img = osh.associated_images[key] - (width, height) = associated_img.size - - # calulate the width or height depends on dim - if width > height: - h = round(dim * height / width) - size = (dim, h) - else: - w = round(dim * width / height) - size = (w, dim) - - associated_img = associated_img.resize(size) - associated_img = np.asarray(associated_img)[:, :, 0:3] - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img) - -def saveMacro(s, params): - dim = params.get("small_dim", 500) - saveAssociatedImage(s, "macro", dim) - return - -def saveMask(s, params): - logging.info(f"{s['filename']} - \tsaveMaskUse") - suffix = params.get("suffix", None) - - # check suffix param - if not suffix: - msg = f"{s['filename']} - \tPlease set the suffix for mask use." - logging.error(msg) - return - - # save mask - io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) - -def saveThumbnails(s, params): - logging.info(f"{s['filename']} - \tsaveThumbnail") - # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one - img = s.getImgThumb(params.get("image_work_size", "1.25x")) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img) - - img = s.getImgThumb(params.get("small_dim", 500)) - io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img) - return +import logging +import os +from skimage import io, img_as_ubyte +from distutils.util import strtobool +from skimage import color +import numpy as np + +import matplotlib.pyplot as plt + + +def blend2Images(img, mask): + if (img.ndim == 3): + img = color.rgb2gray(img) + if (mask.ndim == 3): + mask = color.rgb2gray(mask) + img = img[:, :, None] * 1.0 # can't use boolean + mask = mask[:, :, None] * 1.0 + out = np.concatenate((mask, img, mask), 2) + return out + + +def saveFinalMask(s, params): + logging.info(f"{s['filename']} - \tsaveUsableRegion") + + mask = s["img_mask_use"] + for mask_force in s["img_mask_force"]: + mask[s[mask_force]] = 0 + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask)) + + if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask? + img = s.getImgThumb(s["image_work_size"]) + out = blend2Images(img, mask) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out)) + + return + + +def saveAssociatedImage(s, key:str, dim:int): + logging.info(f"{s['filename']} - \tsave{key.capitalize()}") + osh = s["os_handle"] + + if not key in osh.associated_images: + message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images" + logging.warning(message) + s["warnings"].append(message) + return + + # get asscociated image by key + associated_img = osh.associated_images[key] + (width, height) = associated_img.size + + # calulate the width or height depends on dim + if width > height: + h = round(dim * height / width) + size = (dim, h) + else: + w = round(dim * width / height) + size = (w, dim) + + associated_img = associated_img.resize(size) + associated_img = np.asarray(associated_img)[:, :, 0:3] + io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img) + +def saveMacro(s, params): + dim = params.get("small_dim", 500) + saveAssociatedImage(s, "macro", dim) + return + +def saveMask(s, params): + logging.info(f"{s['filename']} - \tsaveMaskUse") + suffix = params.get("suffix", None) + + # check suffix param + if not suffix: + msg = f"{s['filename']} - \tPlease set the suffix for mask use." + logging.error(msg) + return + + # save mask + io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) + +def saveThumbnails(s, params): + logging.info(f"{s['filename']} - \tsaveThumbnail") + # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one + img = s.getImgThumb(params.get("image_work_size", "1.25x")) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img) + + img = s.getImgThumb(params.get("small_dim", 500)) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img) + return diff --git a/setup.cfg b/setup.cfg index 1237c87..2314020 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,56 +1,57 @@ -[metadata] -name = histoqc -url = https://github.com/choosehappy/HistoQC -download_url = https://github.com/choosehappy/HistoQC -license = BSD -license_file = LICENSE.txt -description = HistoQC is an open-source quality control tool for digital pathology slides -long_description = file: Readme.md -long_description_content_type = text/markdown -author = Andrew Janowczyk -classifiers = - Intended Audience :: Science/Research - License :: OSI Approved :: BSD License - Programming Language :: Python - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Topic :: Scientific/Engineering - Topic :: Scientific/Engineering :: Visualization - Topic :: Scientific/Engineering :: Information Analysis - Topic :: Scientific/Engineering :: Bio-Informatics - Topic :: Utilities - Operating System :: Microsoft :: Windows - Operating System :: POSIX - Operating System :: Unix - Operating System :: MacOS - - -[options] -packages = - histoqc - histoqc.config - histoqc.data - histoqc.ui -python_requires = >=3.7 -install_requires = - importlib_resources; python_version < '3.9' - matplotlib - numpy - scikit-image - scikit-learn - scipy - openslide-python - dill - -[options.extras_require] -tests = - pytest - pytest-cov - requests - -[options.entry_points] -console_scripts = - histoqc_pipeline = histoqc.__main__:main - histoqc_ui = histoqc.ui.__main__:main +[metadata] +name = histoqc +url = https://github.com/choosehappy/HistoQC +download_url = https://github.com/choosehappy/HistoQC +license = BSD +license_file = LICENSE.txt +description = HistoQC is an open-source quality control tool for digital pathology slides +long_description = file: Readme.md +long_description_content_type = text/markdown +author = Andrew Janowczyk +classifiers = + Intended Audience :: Science/Research + License :: OSI Approved :: BSD License + Programming Language :: Python + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Topic :: Scientific/Engineering + Topic :: Scientific/Engineering :: Visualization + Topic :: Scientific/Engineering :: Information Analysis + Topic :: Scientific/Engineering :: Bio-Informatics + Topic :: Utilities + Operating System :: Microsoft :: Windows + Operating System :: POSIX + Operating System :: Unix + Operating System :: MacOS + + +[options] +packages = + histoqc + histoqc.config + histoqc.data + histoqc.ui + histoqc.import_wrapper +python_requires = >=3.7 +install_requires = + importlib_resources; python_version < '3.9' + matplotlib + numpy + scikit-image + scikit-learn + scipy + openslide-python + dill + +[options.extras_require] +tests = + pytest + pytest-cov + requests + +[options.entry_points] +console_scripts = + histoqc_pipeline = histoqc.__main__:main + histoqc_ui = histoqc.ui.__main__:main