Skip to content

Commit

Permalink
Merge pull request #266 from nanli-emory/unittest
Browse files Browse the repository at this point in the history
Unit tests for verifying reproducibility of output
  • Loading branch information
jacksonjacobs1 authored May 7, 2024
2 parents 614d2f0 + 37ea0f3 commit 69ae112
Show file tree
Hide file tree
Showing 33 changed files with 3,023 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ optional arguments:
-f, --force force overwriting of existing files
-b BATCH, --batch BATCH
break results file into subsets of this size
-s SEED, --seed SEED,
set a seed used to produce a random number in all modules
-n NPROCESSES, --nprocesses NPROCESSES
number of processes to launch
--symlink TARGET_DIR create symlink to outdir in TARGET_DIR
Expand Down
3 changes: 2 additions & 1 deletion histoqc/BaseImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

class BaseImage(dict):

def __init__(self, fname, fname_outdir, params):
def __init__(self, fname, fname_outdir, seed, params):
dict.__init__(self)

self.in_memory_compression = strtobool(params.get("in_memory_compression", "False"))
Expand All @@ -31,6 +31,7 @@ def __init__(self, fname, fname_outdir, params):
self.addToPrintList("comments", " ")

self["outdir"] = fname_outdir
self["seed"] = seed
self["dir"] = os.path.dirname(fname)

self["os_handle"] = openslide.OpenSlide(fname)
Expand Down
5 changes: 5 additions & 0 deletions histoqc/ClassificationModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ def byExampleWithFeatures(s, params):

if nsamples_per_example != -1: #sub sambling required
nitems = nsamples_per_example if nsamples_per_example > 1 else int(mask.shape[0]*nsamples_per_example)

# set seed to random function if seed is not None
if s["seed"] is not None:
np.random.seed(int(s["seed"]))

idxkeep = np.random.choice(mask.shape[0], size=int(nitems))
eximg = eximg[idxkeep, :]
mask = mask[idxkeep]
Expand Down
4 changes: 4 additions & 0 deletions histoqc/LocalTextureEstimationModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


def estimateGreyComatrixFeatures(s, params):

prefix = params.get("prefix", None)
prefix = prefix+"_" if prefix else ""

Expand All @@ -19,6 +20,9 @@ def estimateGreyComatrixFeatures(s, params):
invert = strtobool(params.get("invert", "False"))
mask_name = params.get("mask_name","img_mask_use")

# set seed to random function if seed is not None
if s["seed"] is not None:
np.random.seed(int(s["seed"]))

img = s.getImgThumb(s["image_work_size"])
img = color.rgb2gray(img)
Expand Down
5 changes: 5 additions & 0 deletions histoqc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def main(argv=None):
help="break results file into subsets of this size",
type=int,
default=None)
parser.add_argument('-s', '--seed',
help="set a seed used to produce a random number in all modules",
type=int,
default=None)
parser.add_argument('-n', '--nprocesses',
help="number of processes to launch",
type=int,
Expand Down Expand Up @@ -158,6 +162,7 @@ def main(argv=None):
'shared_dict': mpm.dict(),
'num_files': num_files,
'force': args.force,
'seed': args.seed
}
failed = mpm.list()
setup_plotting_backend(lm.logger)
Expand Down
10 changes: 6 additions & 4 deletions histoqc/_worker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""histoqc worker functions"""
import os
import shutil

import numpy as np
from histoqc.BaseImage import BaseImage
from histoqc._pipeline import load_pipeline
from histoqc._pipeline import setup_plotting_backend
Expand All @@ -16,9 +16,11 @@ def worker_setup(c):


def worker(idx, file_name, *,
process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force):
process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force, seed):
"""pipeline worker function"""

# set the seed
if seed is not None:
np.random.seed(seed)
# --- output directory preparation --------------------------------
fname_outdir = os.path.join(outdir, os.path.basename(file_name))
if os.path.isdir(fname_outdir): # directory exists
Expand All @@ -37,7 +39,7 @@ def worker(idx, file_name, *,
log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")

try:
s = BaseImage(file_name, fname_outdir, dict(config.items("BaseImage.BaseImage")))
s = BaseImage(file_name, fname_outdir, seed, dict(config.items("BaseImage.BaseImage")))

for process, process_params in process_queue:
process_params["lock"] = lock
Expand Down
1 change: 1 addition & 0 deletions histoqc/config/config_v2.1.ini
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ npatches: 1000
feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation
invert: False
mask_name: img_mask_use

[LightDarkModule.minimumPixelIntensityNeighborhoodFiltering]
disk_size: 5
upper_threshold: 210
Expand Down
Loading

0 comments on commit 69ae112

Please sign in to comment.