From 80aae8fda100a6d6887f459964b469d271e6dfde Mon Sep 17 00:00:00 2001 From: Jacob Silterra Date: Wed, 12 Jun 2024 07:56:02 -0400 Subject: [PATCH 1/3] Tweaks to README * Add citation to NLST training data. * Change levels of some headings. --- README.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e14c30a..b56b82b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Lung Cancer Risk Prediction. Additional documentation can be found on the [GitHub Wiki](https://github.com/reginabarzilaygroup/Sybil/wiki). -## Run a regression test +# Run a regression test ```shell python tests/regression_test.py @@ -15,7 +15,7 @@ python tests/regression_test.py This will download the`sybil_ensemble` model and sample data, and compare the results to what has previously been calculated. -## Run the model +# Run the model You can load our pretrained model trained on the NLST dataset, and score a given DICOM serie as follows: @@ -38,7 +38,7 @@ Models available include: `sybil_1`, `sybil_2`, `sybil_3`, `sybil_4`, `sybil_5` All model files are available on [GitHub releases](https://github.com/reginabarzilaygroup/Sybil/releases) as well as [here](https://drive.google.com/drive/folders/1nBp05VV9mf5CfEO6W5RY4ZpcpxmPDEeR?usp=sharing). -## Replicating results +# Replicating results You can replicate the results from our model using our training script: @@ -49,14 +49,14 @@ python train.py See our [documentation](docs/readme.md) for a full description of Sybil's training parameters. Additional information on the training process can be found on the [train](https://github.com/reginabarzilaygroup/Sybil/tree/train) branch of this repository. -## LDCT Orientation +# LDCT Orientation The model expects the input to be an Axial LDCT, where the first frame is of the abdominal region and the last frame is along the clavicles. When the input is of the `dicom` type, the frames will be automatically sorted. However, for `png` inputs, the path of the PNG files must be in the right anatomical order. -## Annotations +# Annotations To help train the model, two fellowship-trained thoracic radiologists jointly annotated suspicious lesions on NLST LDCTs using [MD.AI](https://md.ai) software for all participants who developed cancer within 1 year after an LDCT. Each lesion’s volume was marked with bounding boxes on contiguous thin-cut axial images. The “ground truth” annotations were informed by the imaging appearance and the clinical data provided by the NLST, i.e., the series and image number of cancerous nodules and the anatomical location of biopsy-confirmed lung cancers. For these participants, lesions in the location of subsequently diagnosed cancers were also annotated, even if the precursor lesion lacked imaging features specific for cancer. @@ -78,7 +78,7 @@ Annotations are availble to download in JSON format [here](https://drive.google. } ``` -## Attention Scores +# Attention Scores The multi-attention pooling layer aims to learn the importance of each slice in the 3D volume and the importance of each pixel in the 2D slice. During training, these are supervised by bounding boxes of the cancerous nodules. This is a soft attention mechanism, and the model's primary task is to predict the risk of lung cancer. However, the attention scores can be extracted and used to visualize the model's focus on the 3D volume and the 2D slices. @@ -112,7 +112,13 @@ series_with_attention = visualize_attentions( ``` -## Cite +# Training Data + +The Sybil model was trained using the National Lung Screening Trial (NLST) dataset: + +National Lung Screening Trial Research Team. (2013). Data from the National Lung Screening Trial (NLST) [Data set]. The Cancer Imaging Archive. https://doi.org/10.7937/TCIA.HMQ8-J677 + +# Cite ``` @article{mikhael2023sybil, From 2bd7fccc3b0e8cece695efe4e3cf778183d9a698 Mon Sep 17 00:00:00 2001 From: Jacob Silterra Date: Wed, 12 Jun 2024 09:21:56 -0400 Subject: [PATCH 2/3] Set num threads available to torch. Available as command line argument, default is the number of CPUs. Doesn't matter when bare-metal but improves performance substantially in containers. --- setup.cfg | 4 ++-- sybil/model.py | 23 ++++++++++++++++++++++- sybil/predict.py | 9 ++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index c76970a..fc57e6f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,10 +7,10 @@ author_email = license_file = LICENSE.txt long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8; variant=GFM -version = 1.3.0 +version = 1.4.0 # url = project_urls = - ; Documentation = https://.../docs + Documentation = https://github.com/reginabarzilaygroup/sybil/wiki Source = https://github.com/reginabarzilaygroup/sybil Tracker = https://github.com/reginabarzilaygroup/sybil/issues diff --git a/sybil/model.py b/sybil/model.py index 74899e7..10fb857 100644 --- a/sybil/model.py +++ b/sybil/model.py @@ -116,6 +116,21 @@ def download_and_extract(remote_model_url: str, local_model_dir) -> List[str]: return all_files_and_dirs +def _torch_set_num_threads(threads) -> int: + """ + Set the number of CPU threads for torch to use. + Set to a negative number for no-op. + Set to 0 for the number of CPUs. + """ + if threads < 0: + return torch.get_num_threads() + if threads is None or threads == 0: + threads = os.cpu_count() + + torch.set_num_threads(threads) + return torch.get_num_threads() + + class Sybil: def __init__( self, @@ -294,7 +309,7 @@ def _predict( return Prediction(scores=scores, attentions=attentions) def predict( - self, series: Union[Serie, List[Serie]], return_attentions: bool = False + self, series: Union[Serie, List[Serie]], return_attentions: bool = False, threads=0, ) -> Prediction: """Run predictions over the given serie(s) and ensemble @@ -304,6 +319,8 @@ def predict( One or multiple series to run predictions for. return_attentions : bool If True, returns attention scores for each serie. See README for details. + threads : int + Number of CPU threads to use for PyTorch inference. Default is 0 (use all available cores). Returns ------- @@ -312,6 +329,10 @@ def predict( """ + # Set CPU threads available to torch + num_threads = _torch_set_num_threads(threads) + self._logger.debug(f"Using {num_threads} threads for PyTorch inference") + if self._device_flexible: self.device = self._pick_device() self.to(self.device) diff --git a/sybil/predict.py b/sybil/predict.py index 703fb3c..abc2679 100644 --- a/sybil/predict.py +++ b/sybil/predict.py @@ -69,6 +69,11 @@ def _get_parser(): parser.add_argument("-l", "--log", "--loglevel", "--log-level", default="INFO", dest="loglevel") + parser.add_argument('--threads', type=int, default=0, + help="Number of threads to use for PyTorch inference. " + "Default is 0 (use all available cores)." + "Set to a negative number to use Pytorch default.") + parser.add_argument("-v", "--version", action="version", version=__version__) return parser @@ -81,6 +86,7 @@ def predict( return_attentions=False, write_attention_images=False, file_type: Literal["auto", "dicom", "png"] = "auto", + threads: int = 0, ): logger = sybil.utils.logging_utils.get_logger() @@ -115,7 +121,7 @@ def predict( # Get risk scores serie = Serie(input_files, voxel_spacing=voxel_spacing, file_type=file_type) series = [serie] - prediction = model.predict(series, return_attentions=return_attentions) + prediction = model.predict(series, return_attentions=return_attentions, threads=threads) prediction_scores = prediction.scores[0] logger.debug(f"Prediction finished. Results:\n{prediction_scores}") @@ -155,6 +161,7 @@ def main(): args.return_attentions, args.write_attention_images, file_type=args.file_type, + threads=args.threads, ) print(json.dumps(pred_dict, indent=2)) From 44130b9b24d8e25937091e7426e374d0debc1d05 Mon Sep 17 00:00:00 2001 From: Jacob Silterra Date: Wed, 12 Jun 2024 10:01:12 -0400 Subject: [PATCH 3/3] Change demo script name inference -> predict. --- scripts/{run_inference_demo.sh => run_predict_demo.sh} | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename scripts/{run_inference_demo.sh => run_predict_demo.sh} (79%) diff --git a/scripts/run_inference_demo.sh b/scripts/run_predict_demo.sh similarity index 79% rename from scripts/run_inference_demo.sh rename to scripts/run_predict_demo.sh index 8ac20b2..2d4f455 100755 --- a/scripts/run_inference_demo.sh +++ b/scripts/run_predict_demo.sh @@ -13,8 +13,9 @@ if [ ! -d "$demo_scan_dir" ]; then unzip -q sybil_example.zip fi -# Either python3 sybil/predict.py or sybil-predict (if installed via pip) -python3 sybil/predict.py \ +# If not installed with pip, sybil-predict will not be available. +# Can use "python3 sybil/predict.py" instead. +sybil-predict \ --loglevel DEBUG \ --output-dir demo_prediction \ --return-attentions \