rendeirolab · afrendeiro · Aug 16, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/.github/workflows/pytest_workflow.yml b/.github/workflows/pytest_workflow.yml
@@ -0,0 +1,27 @@
+name: Pytest testing
+
+on: [push]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - name: Install system dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y openslide-tools
+
+    - name: Install Python dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Test with pytest
+      run: |
+        pip install pytest pytest-cov
+        pytest wsi --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,13 @@
 __pycache__/
 *.egg-info
 build/
+dist
+.coverage
+cache
+junit
+joblib
+__pycache__
+.mypy_cache
+coverage.xml
+_version.py
+*.sublime-*
diff --git a/Makefile b/Makefile
@@ -0,0 +1,24 @@
+clean:
+	-rm -rf build
+	-rm -rf dist
+	-rm -rf *.egg-info
+	-rm -rf .coverage
+	-rm -rf cache
+	-rm -rf junit
+	-rm -rf joblib
+	-rm -rf __pycache__
+	-rm -rf .mypy_cache
+	-rm -rf htmlcov
+	-rm coverage.xml
+	# -rm -rf .pytest_cache
+
+test: clean
+	pytest wsi \
+		--doctest-modules \
+		--junitxml=junit/test-results.xml \
+		--cov=wsi \
+		--cov-report=xml \
+		--cov-report=html
+
+install: clean
+	python -m pip install -e .
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-CLAM
+WSI
 ====
 This is a fork of the repository from [Mahmood lab's CLAM repository](https://github.com/mahmoodlab/CLAM).
 It is made available under the GPLv3 License and is available for non-commercial academic purposes.
@@ -8,7 +8,7 @@ It is made available under the GPLv3 License and is available for non-commercial
 
 The purpose of the fork is to compartimentalize the features related with processing of whole-slide images (WSI) from the CLAM model.
 
-The package has been renamed to `wsi_core` as that was the name of the module related with whole slide image processing.
+The package has been renamed to `wsi`.
 
 
 ## Installation
@@ -17,24 +17,40 @@ While the repository is private, make sure you [exchange SSH keys of the machine
 
 Then simply install with `pip`:
 ```bash
-git clone [email protected]:rendeirolab/CLAM.git
-cd CLAM
+# pip install git+ssh://[email protected]:rendeirolab/wsi.git
+git clone [email protected]:rendeirolab/wsi.git
+cd wsi
 pip install .
 ```
 
 Note that the package uses setuptols-scm for version control and therefore the installation source needs to be a git repository (a zip file of source code won't work).
 
 ## Usage
 
+The only exposed class is `WholeSlideImage` enables all the functionalities of the package.
+
+### Quick start - segmentation, tiling and feature extraction
+```python
+from wsi import WholeSlideImage    
+
+url = "https://brd.nci.nih.gov/brd/imagedownload/GTEX-O5YU-1426"
+slide = WholeSlideImage(url)
+slide.segment()
+slide.tile()
+feats, coords = slide.inference("resnet18")
+```
+
+### Full example
+
 This package is meant for both interactive use and for use in a pipeline at scale.
 By default actions do not return anything, but instead save the results to disk in files relative to the slide file.
 
 All major functions have sensible defaults but allow for customization.
 Please check the docstring of each function for more information.
 
 ```python
-from wsi_core import WholeSlideImage
-from wsi_core.utils import Path
+from wsi import WholeSlideImage
+from wsi.utils import Path
 
 # Get example slide image
 slide_file = Path("GTEX-12ZZW-2726.svs")
@@ -48,7 +64,7 @@ if not slide_file.exists():
 # Instantiate slide object
 slide = WholeSlideImage(slide_file)
 
-# Instantiate slide object
+# Instantiation can be done with custom attributes
 slide = WholeSlideImage(slide_file, attributes=dict(donor="GTEX-12ZZW"))
 
 # Segment tissue (segmentation mask is stored as polygons in slide.contours_tissue)
@@ -75,15 +91,28 @@ for img in images:
 slide.save_tile_images(output_dir=slide_file.parent / (slide_file.stem + "_tiles"))
 
 # Use in a torch dataloader
-loader = slide.as_data_loader()
+loader = slide.as_data_loader(with_coords=True)
 
-# Extract features
+# Extract features "manually"
 import torch
 from tqdm import tqdm
-model = torch.hub.load("pytorch/vision", "resnet50", pretrained=True) 
-for count, (batch, coords) in tqdm(enumerate(loader), total=len(loader)):
+model = torch.hub.load("pytorch/vision", "resnet18", weights="DEFAULT")
+feats = list()
+coords = list()
+for count, (batch, yx) in tqdm(enumerate(loader), total=len(loader)):
     with torch.no_grad(): 
-        features = model(batch).numpy()
+        f = model(batch).numpy()
+    feats.append(f)
+    coords.append(yx)
+
+feats = np.concatenate(feats, axis=0)
+coords = np.concatenate(coords, axis=0)
+
+# Extract features "automatically"
+feats, coords = slide.inference('resnet18')
+
+# Additional parameters can also be specified
+feats, coords = slide.inference('resnet18', device='cuda', data_loader_kws=dict(batch_size=512))
 ```
 
 ## Reference

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 # PIP, using PEP621
 [project]
-name = "wsi_core"
+name = "wsi"
 authors = [
     {name = "Andre Rendeiro", email = "[email protected]"},
 ]
@@ -11,8 +11,8 @@ keywords = [
 ]
 classifiers = [
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.7",
-    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
     "Development Status :: 3 - Alpha",
     "Typing :: Typed",
     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
@@ -21,14 +21,21 @@ classifiers = [
 #license = "gpt3"
 requires-python = ">=3.10"
 dependencies = [
-    "opencv-python",
     "h5py",
     "matplotlib",
     "numpy",
+    "opencv-python",
     "openslide-python",
+    "pandas",
     "Pillow",
+    "requests",
+    "scikit-image",
+    "scikit-learn",
+    "scipy",
+    "shapely",
     "torch",
     "torchvision",
+    "tqdm",
 ]
 dynamic = ['version']
 
@@ -51,9 +58,9 @@ doc = [
 ]
 
 [project.urls]
-homepage = "https://github.com/rendeirolab/CLAM"
-documentation = "https://github.com/rendeirolab/CLAM/blob/main/README.md"
-repository = "https://github.com/rendeirolab/CLAM"
+homepage = "https://github.com/rendeirolab/wsi"
+documentation = "https://github.com/rendeirolab/wsi/blob/main/README.md"
+repository = "https://github.com/rendeirolab/wsi"
 
 [build-system]
 # requires = ["poetry>=0.12", "setuptools>=45", "wheel", "poetry-dynamic-versioning"]
@@ -62,7 +69,7 @@ requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools_scm]
-write_to = "wsi_core/_version.py"
+write_to = "wsi/_version.py"
 write_to_template = 'version = __version__ = "{version}"'
 
 [tool.black]
@@ -104,7 +111,7 @@ module = [
     'matplotlib.*',
     'networkx.*',
     #
-    'wsi_core.*'
+    'wsi.*'
 ]
 ignore_missing_imports = true
 
@@ -117,5 +124,5 @@ testpaths = [
 ]
 markers = [
     'slow', # 'marks tests as slow (deselect with "-m 'not slow'")',
-    'serial'
-]
+    "wsi"
+]
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,15 @@
-opencv-python
 h5py
 matplotlib
 numpy
+opencv-python
 openslide-python
+pandas
 Pillow
+requests
+scikit-image
+scikit-learn
+scipy
+shapely
 torch
 torchvision
+tqdm
diff --git a/wsi/__init__.py b/wsi/__init__.py
@@ -0,0 +1,2 @@
+from .wsi import WholeSlideImage
+from ._version import version, __version__
diff --git a/wsi/tests/test_wsi.py b/wsi/tests/test_wsi.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+import tempfile
+import joblib
+
+import requests
+import pytest
+from wsi import WholeSlideImage
+import numpy as np
+
+
+mem = joblib.Memory("cache", verbose=0)
+
+
+@pytest.fixture(scope="session")
+@mem.cache
+def get_test_slide():
+    slide_file = Path("GTEX-O5YU-1426.svs")
+    if not slide_file.exists():
+        url = f"https://brd.nci.nih.gov/brd/imagedownload/{slide_file.stem}"
+        slide_file = Path(tempfile.NamedTemporaryFile(suffix=".svs").name)
+
+        with open(slide_file, "wb") as file:
+            for chunk in requests.get(url, stream=True).iter_content(chunk_size=1024 * 4):
+                file.write(chunk)
+    else:
+        for f in sorted(Path().glob(slide_file.stem + "*")):
+            if f != slide_file:
+                f.unlink()
+    return slide_file
+
+
+@pytest.mark.wsi
+@pytest.mark.slow
+def test_whole_slide_image_inference(get_test_slide):
+    slide = WholeSlideImage(get_test_slide)
+    slide.segment()
+    assert len(slide.contours_tissue) == len(slide.holes_tissue)
+    slide.tile()
+    feats, coords = slide.inference("resnet18")
+
+    # Assert conditions
+    assert coords.shape == (654, 2), "Coords shape mismatch"
+    print(feats.sum())
+    assert np.allclose(feats.sum(), 14.555267, atol=1e-3), "Features sum mismatch"