From 6e475daafa1fb5c6f2120cc9699f505d65e95336 Mon Sep 17 00:00:00 2001
From: Spiros Maggioros <spirosastro@gmail.com>
Date: Wed, 9 Oct 2024 14:46:14 +0300
Subject: [PATCH] parallelization ready

---
 NiChart_DLMUSE/__main__.py                    |  19 +-
 NiChart_DLMUSE/utils.py                       |  43 ++
 .../temp_working_dir/s2_dlicv/dataset.json    |  11 +
 .../temp_working_dir/s2_dlicv/plans.json      | 434 ++++++++++++++++++
 .../s2_dlicv/predict_from_raw_data_args.json  |  11 +
 .../s2_dlicv/renamed_image/renaming.json      |  10 +
 6 files changed, 527 insertions(+), 1 deletion(-)
 create mode 100644 output/split_0/temp_working_dir/s2_dlicv/dataset.json
 create mode 100644 output/split_0/temp_working_dir/s2_dlicv/plans.json
 create mode 100644 output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json
 create mode 100644 output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json

diff --git a/NiChart_DLMUSE/__main__.py b/NiChart_DLMUSE/__main__.py
index 093b294..3243c01 100644
--- a/NiChart_DLMUSE/__main__.py
+++ b/NiChart_DLMUSE/__main__.py
@@ -7,8 +7,11 @@
 
 import argparse
 import os
+import multiprocessing
+import threading
 
 from .dlmuse_pipeline import run_pipeline
+from .utils import split_data, remove_subfolders
 
 # VERSION = pkg_resources.require("NiChart_DLMUSE")[0].version
 VERSION = 1.0
@@ -110,8 +113,22 @@ def main() -> None:
         os.system("DLMUSE --clear_cache")
 
     # Run pipeline
-    run_pipeline(in_data, out_dir, device)
+    no_threads = 4 # for now
+    subfolders = split_data(in_data, no_threads)
 
+    threads = []
+    for i in range(len(subfolders)):
+        curr_out_dir = out_dir + f"split_{i}"
+        curr_thread = threading.Thread(target=run_pipeline, args=(subfolders[i], curr_out_dir, device))
+        curr_thread.start()
+        threads.append(curr_thread)
+
+    for t in threads:
+        t.join()
+
+    remove_subfolders(in_data)
+
+    # run_pipeline(in_data, out_dir, device)
 
 if __name__ == "__main__":
     main()
diff --git a/NiChart_DLMUSE/utils.py b/NiChart_DLMUSE/utils.py
index be618f4..6c07906 100644
--- a/NiChart_DLMUSE/utils.py
+++ b/NiChart_DLMUSE/utils.py
@@ -126,3 +126,46 @@ def make_img_list(in_data: str) -> pd.DataFrame:
 
     # Return out dataframe
     return df_out
+
+def dir_size(in_dir: str) -> int:
+    """
+        Returns the number of images the user passed
+    """
+    size = 0
+    for path in os.listdir(in_dir):
+        if os.path.isfile(os.path.join(in_dir, path)):
+            size += 1
+
+    return size
+
+
+def split_data(in_dir: str, N: int) -> list:
+    """
+        Splits the input data directory into subfolders of size N
+    """
+    assert(N > 0)
+    data_size = dir_size(in_dir)
+    no_files_in_folders = data_size / N if (data_size % N == 0) else (data_size / N) + 1
+    assert(no_files_in_folders > 0)
+    subfolders = []
+
+    current_folder = 1
+    current_file = 0
+    os.system(f"mkdir {in_dir}/split_{current_folder}")
+    for img in os.listdir(in_dir):
+        if current_file >= no_files_in_folders:
+            subfolders.append(f"{in_dir}/split_{current_folder}")
+            current_folder += 1
+            os.system(f"mkdir {in_dir}/split_{current_folder}")
+            current_file = 0
+
+        file = os.path.join(in_dir, img)
+        if os.path.isfile(file):
+            os.system(f"cp {file} {in_dir}/split_{current_folder}")
+            current_file += 1
+
+    return subfolders
+
+
+def remove_subfolders(in_dir: str) -> None:
+    os.system(f"rm -r {in_dir}/split_*")
diff --git a/output/split_0/temp_working_dir/s2_dlicv/dataset.json b/output/split_0/temp_working_dir/s2_dlicv/dataset.json
new file mode 100644
index 0000000..c39f8d0
--- /dev/null
+++ b/output/split_0/temp_working_dir/s2_dlicv/dataset.json
@@ -0,0 +1,11 @@
+{
+    "channel_names": {
+        "0": "MRI"
+    },
+    "file_ending": ".nii.gz",
+    "labels": {
+        "background": 0,
+        "class1": 1
+    },
+    "numTraining": 1806
+}
\ No newline at end of file
diff --git a/output/split_0/temp_working_dir/s2_dlicv/plans.json b/output/split_0/temp_working_dir/s2_dlicv/plans.json
new file mode 100644
index 0000000..0849bf1
--- /dev/null
+++ b/output/split_0/temp_working_dir/s2_dlicv/plans.json
@@ -0,0 +1,434 @@
+{
+    "dataset_name": "Dataset901_Task901_dlicv",
+    "plans_name": "nnUNetPlans",
+    "original_median_spacing_after_transp": [
+        1.0,
+        1.0,
+        1.0
+    ],
+    "original_median_shape_after_transp": [
+        255,
+        255,
+        176
+    ],
+    "image_reader_writer": "SimpleITKIO",
+    "transpose_forward": [
+        0,
+        1,
+        2
+    ],
+    "transpose_backward": [
+        0,
+        1,
+        2
+    ],
+    "configurations": {
+        "2d": {
+            "data_identifier": "nnUNetPlans_2d",
+            "preprocessor_name": "DefaultPreprocessor",
+            "batch_size": 66,
+            "patch_size": [
+                256,
+                192
+            ],
+            "median_image_size_in_voxels": [
+                240.0,
+                179.0
+            ],
+            "spacing": [
+                1.0,
+                1.0
+            ],
+            "normalization_schemes": [
+                "ZScoreNormalization"
+            ],
+            "use_mask_for_norm": [
+                false
+            ],
+            "UNet_class_name": "PlainConvUNet",
+            "UNet_base_num_features": 32,
+            "n_conv_per_stage_encoder": [
+                2,
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "n_conv_per_stage_decoder": [
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "num_pool_per_axis": [
+                5,
+                5
+            ],
+            "pool_op_kernel_sizes": [
+                [
+                    1,
+                    1
+                ],
+                [
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2
+                ]
+            ],
+            "conv_kernel_sizes": [
+                [
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3
+                ]
+            ],
+            "unet_max_num_features": 512,
+            "resampling_fn_data": "resample_data_or_seg_to_shape",
+            "resampling_fn_seg": "resample_data_or_seg_to_shape",
+            "resampling_fn_data_kwargs": {
+                "is_seg": false,
+                "order": 3,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_seg_kwargs": {
+                "is_seg": true,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_probabilities": "resample_data_or_seg_to_shape",
+            "resampling_fn_probabilities_kwargs": {
+                "is_seg": false,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "batch_dice": true
+        },
+        "3d_lowres": {
+            "data_identifier": "nnUNetPlans_3d_lowres",
+            "preprocessor_name": "DefaultPreprocessor",
+            "batch_size": 2,
+            "patch_size": [
+                160,
+                128,
+                112
+            ],
+            "median_image_size_in_voxels": [
+                235,
+                226,
+                169
+            ],
+            "spacing": [
+                1.0609,
+                1.0609,
+                1.0609
+            ],
+            "normalization_schemes": [
+                "ZScoreNormalization"
+            ],
+            "use_mask_for_norm": [
+                false
+            ],
+            "UNet_class_name": "PlainConvUNet",
+            "UNet_base_num_features": 32,
+            "n_conv_per_stage_encoder": [
+                2,
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "n_conv_per_stage_decoder": [
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "num_pool_per_axis": [
+                5,
+                5,
+                4
+            ],
+            "pool_op_kernel_sizes": [
+                [
+                    1,
+                    1,
+                    1
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    1
+                ]
+            ],
+            "conv_kernel_sizes": [
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ]
+            ],
+            "unet_max_num_features": 320,
+            "resampling_fn_data": "resample_data_or_seg_to_shape",
+            "resampling_fn_seg": "resample_data_or_seg_to_shape",
+            "resampling_fn_data_kwargs": {
+                "is_seg": false,
+                "order": 3,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_seg_kwargs": {
+                "is_seg": true,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_probabilities": "resample_data_or_seg_to_shape",
+            "resampling_fn_probabilities_kwargs": {
+                "is_seg": false,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "batch_dice": false,
+            "next_stage": "3d_cascade_fullres"
+        },
+        "3d_fullres": {
+            "data_identifier": "nnUNetPlans_3d_fullres",
+            "preprocessor_name": "DefaultPreprocessor",
+            "batch_size": 2,
+            "patch_size": [
+                160,
+                128,
+                112
+            ],
+            "median_image_size_in_voxels": [
+                249.0,
+                240.0,
+                179.0
+            ],
+            "spacing": [
+                1.0,
+                1.0,
+                1.0
+            ],
+            "normalization_schemes": [
+                "ZScoreNormalization"
+            ],
+            "use_mask_for_norm": [
+                false
+            ],
+            "UNet_class_name": "PlainConvUNet",
+            "UNet_base_num_features": 32,
+            "n_conv_per_stage_encoder": [
+                2,
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "n_conv_per_stage_decoder": [
+                2,
+                2,
+                2,
+                2,
+                2
+            ],
+            "num_pool_per_axis": [
+                5,
+                5,
+                4
+            ],
+            "pool_op_kernel_sizes": [
+                [
+                    1,
+                    1,
+                    1
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    2
+                ],
+                [
+                    2,
+                    2,
+                    1
+                ]
+            ],
+            "conv_kernel_sizes": [
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ],
+                [
+                    3,
+                    3,
+                    3
+                ]
+            ],
+            "unet_max_num_features": 320,
+            "resampling_fn_data": "resample_data_or_seg_to_shape",
+            "resampling_fn_seg": "resample_data_or_seg_to_shape",
+            "resampling_fn_data_kwargs": {
+                "is_seg": false,
+                "order": 3,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_seg_kwargs": {
+                "is_seg": true,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "resampling_fn_probabilities": "resample_data_or_seg_to_shape",
+            "resampling_fn_probabilities_kwargs": {
+                "is_seg": false,
+                "order": 1,
+                "order_z": 0,
+                "force_separate_z": null
+            },
+            "batch_dice": true
+        },
+        "3d_cascade_fullres": {
+            "inherits_from": "3d_fullres",
+            "previous_stage": "3d_lowres"
+        }
+    },
+    "experiment_planner_used": "ExperimentPlanner",
+    "label_manager": "LabelManager",
+    "foreground_intensity_properties_per_channel": {
+        "0": {
+            "max": 8952.697265625,
+            "mean": 528.2257080078125,
+            "median": 317.0,
+            "min": -24.0,
+            "percentile_00_5": 20.0,
+            "percentile_99_5": 3972.0,
+            "std": 636.9642333984375
+        }
+    }
+}
\ No newline at end of file
diff --git a/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json b/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json
new file mode 100644
index 0000000..e17a9a9
--- /dev/null
+++ b/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json
@@ -0,0 +1,11 @@
+{
+    "folder_with_segs_from_prev_stage": null,
+    "list_of_lists_or_source_folder": "/Users/spirosmag/Documents/NiChart_DLMUSE/output/split_0/temp_working_dir/s2_dlicv/renamed_image",
+    "num_parts": 1,
+    "num_processes_preprocessing": 2,
+    "num_processes_segmentation_export": 2,
+    "output_folder_or_list_of_truncated_output_files": "/Users/spirosmag/Documents/NiChart_DLMUSE/output/split_0/temp_working_dir/s2_dlicv",
+    "overwrite": true,
+    "part_id": 0,
+    "save_probabilities": false
+}
\ No newline at end of file
diff --git a/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json b/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json
new file mode 100644
index 0000000..018a8a9
--- /dev/null
+++ b/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json
@@ -0,0 +1,10 @@
+{
+    "IXI114-Guys-0737-T1_LPS.nii.gz": "case_ 000_0000.nii.gz",
+    "IXI100-Guys-0747-T1_LPS.nii.gz": "case_ 001_0000.nii.gz",
+    "IXI107-Guys-0761-T1_LPS.nii.gz": "case_ 002_0000.nii.gz",
+    "IXI101-Guys-0749-T1_LPS.nii.gz": "case_ 003_0000.nii.gz",
+    "IXI126-HH-1437-T1_LPS.nii.gz": "case_ 004_0000.nii.gz",
+    "IXI106-Guys-0760-T1_LPS.nii.gz": "case_ 005_0000.nii.gz",
+    "IXI105-HH-1471-T1_LPS.nii.gz": "case_ 006_0000.nii.gz",
+    "IXI104-HH-1450-T1_LPS.nii.gz": "case_ 007_0000.nii.gz"
+}
\ No newline at end of file