From 6e475daafa1fb5c6f2120cc9699f505d65e95336 Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Wed, 9 Oct 2024 14:46:14 +0300 Subject: [PATCH] parallelization ready --- NiChart_DLMUSE/__main__.py | 19 +- NiChart_DLMUSE/utils.py | 43 ++ .../temp_working_dir/s2_dlicv/dataset.json | 11 + .../temp_working_dir/s2_dlicv/plans.json | 434 ++++++++++++++++++ .../s2_dlicv/predict_from_raw_data_args.json | 11 + .../s2_dlicv/renamed_image/renaming.json | 10 + 6 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 output/split_0/temp_working_dir/s2_dlicv/dataset.json create mode 100644 output/split_0/temp_working_dir/s2_dlicv/plans.json create mode 100644 output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json create mode 100644 output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json diff --git a/NiChart_DLMUSE/__main__.py b/NiChart_DLMUSE/__main__.py index 093b294..3243c01 100644 --- a/NiChart_DLMUSE/__main__.py +++ b/NiChart_DLMUSE/__main__.py @@ -7,8 +7,11 @@ import argparse import os +import multiprocessing +import threading from .dlmuse_pipeline import run_pipeline +from .utils import split_data, remove_subfolders # VERSION = pkg_resources.require("NiChart_DLMUSE")[0].version VERSION = 1.0 @@ -110,8 +113,22 @@ def main() -> None: os.system("DLMUSE --clear_cache") # Run pipeline - run_pipeline(in_data, out_dir, device) + no_threads = 4 # for now + subfolders = split_data(in_data, no_threads) + threads = [] + for i in range(len(subfolders)): + curr_out_dir = out_dir + f"split_{i}" + curr_thread = threading.Thread(target=run_pipeline, args=(subfolders[i], curr_out_dir, device)) + curr_thread.start() + threads.append(curr_thread) + + for t in threads: + t.join() + + remove_subfolders(in_data) + + # run_pipeline(in_data, out_dir, device) if __name__ == "__main__": main() diff --git a/NiChart_DLMUSE/utils.py b/NiChart_DLMUSE/utils.py index be618f4..6c07906 100644 --- a/NiChart_DLMUSE/utils.py +++ b/NiChart_DLMUSE/utils.py @@ -126,3 +126,46 @@ def make_img_list(in_data: str) -> pd.DataFrame: # Return out dataframe return df_out + +def dir_size(in_dir: str) -> int: + """ + Returns the number of images the user passed + """ + size = 0 + for path in os.listdir(in_dir): + if os.path.isfile(os.path.join(in_dir, path)): + size += 1 + + return size + + +def split_data(in_dir: str, N: int) -> list: + """ + Splits the input data directory into subfolders of size N + """ + assert(N > 0) + data_size = dir_size(in_dir) + no_files_in_folders = data_size / N if (data_size % N == 0) else (data_size / N) + 1 + assert(no_files_in_folders > 0) + subfolders = [] + + current_folder = 1 + current_file = 0 + os.system(f"mkdir {in_dir}/split_{current_folder}") + for img in os.listdir(in_dir): + if current_file >= no_files_in_folders: + subfolders.append(f"{in_dir}/split_{current_folder}") + current_folder += 1 + os.system(f"mkdir {in_dir}/split_{current_folder}") + current_file = 0 + + file = os.path.join(in_dir, img) + if os.path.isfile(file): + os.system(f"cp {file} {in_dir}/split_{current_folder}") + current_file += 1 + + return subfolders + + +def remove_subfolders(in_dir: str) -> None: + os.system(f"rm -r {in_dir}/split_*") diff --git a/output/split_0/temp_working_dir/s2_dlicv/dataset.json b/output/split_0/temp_working_dir/s2_dlicv/dataset.json new file mode 100644 index 0000000..c39f8d0 --- /dev/null +++ b/output/split_0/temp_working_dir/s2_dlicv/dataset.json @@ -0,0 +1,11 @@ +{ + "channel_names": { + "0": "MRI" + }, + "file_ending": ".nii.gz", + "labels": { + "background": 0, + "class1": 1 + }, + "numTraining": 1806 +} \ No newline at end of file diff --git a/output/split_0/temp_working_dir/s2_dlicv/plans.json b/output/split_0/temp_working_dir/s2_dlicv/plans.json new file mode 100644 index 0000000..0849bf1 --- /dev/null +++ b/output/split_0/temp_working_dir/s2_dlicv/plans.json @@ -0,0 +1,434 @@ +{ + "dataset_name": "Dataset901_Task901_dlicv", + "plans_name": "nnUNetPlans", + "original_median_spacing_after_transp": [ + 1.0, + 1.0, + 1.0 + ], + "original_median_shape_after_transp": [ + 255, + 255, + 176 + ], + "image_reader_writer": "SimpleITKIO", + "transpose_forward": [ + 0, + 1, + 2 + ], + "transpose_backward": [ + 0, + 1, + 2 + ], + "configurations": { + "2d": { + "data_identifier": "nnUNetPlans_2d", + "preprocessor_name": "DefaultPreprocessor", + "batch_size": 66, + "patch_size": [ + 256, + 192 + ], + "median_image_size_in_voxels": [ + 240.0, + 179.0 + ], + "spacing": [ + 1.0, + 1.0 + ], + "normalization_schemes": [ + "ZScoreNormalization" + ], + "use_mask_for_norm": [ + false + ], + "UNet_class_name": "PlainConvUNet", + "UNet_base_num_features": 32, + "n_conv_per_stage_encoder": [ + 2, + 2, + 2, + 2, + 2, + 2 + ], + "n_conv_per_stage_decoder": [ + 2, + 2, + 2, + 2, + 2 + ], + "num_pool_per_axis": [ + 5, + 5 + ], + "pool_op_kernel_sizes": [ + [ + 1, + 1 + ], + [ + 2, + 2 + ], + [ + 2, + 2 + ], + [ + 2, + 2 + ], + [ + 2, + 2 + ], + [ + 2, + 2 + ] + ], + "conv_kernel_sizes": [ + [ + 3, + 3 + ], + [ + 3, + 3 + ], + [ + 3, + 3 + ], + [ + 3, + 3 + ], + [ + 3, + 3 + ], + [ + 3, + 3 + ] + ], + "unet_max_num_features": 512, + "resampling_fn_data": "resample_data_or_seg_to_shape", + "resampling_fn_seg": "resample_data_or_seg_to_shape", + "resampling_fn_data_kwargs": { + "is_seg": false, + "order": 3, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_seg_kwargs": { + "is_seg": true, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_probabilities": "resample_data_or_seg_to_shape", + "resampling_fn_probabilities_kwargs": { + "is_seg": false, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "batch_dice": true + }, + "3d_lowres": { + "data_identifier": "nnUNetPlans_3d_lowres", + "preprocessor_name": "DefaultPreprocessor", + "batch_size": 2, + "patch_size": [ + 160, + 128, + 112 + ], + "median_image_size_in_voxels": [ + 235, + 226, + 169 + ], + "spacing": [ + 1.0609, + 1.0609, + 1.0609 + ], + "normalization_schemes": [ + "ZScoreNormalization" + ], + "use_mask_for_norm": [ + false + ], + "UNet_class_name": "PlainConvUNet", + "UNet_base_num_features": 32, + "n_conv_per_stage_encoder": [ + 2, + 2, + 2, + 2, + 2, + 2 + ], + "n_conv_per_stage_decoder": [ + 2, + 2, + 2, + 2, + 2 + ], + "num_pool_per_axis": [ + 5, + 5, + 4 + ], + "pool_op_kernel_sizes": [ + [ + 1, + 1, + 1 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 1 + ] + ], + "conv_kernel_sizes": [ + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ] + ], + "unet_max_num_features": 320, + "resampling_fn_data": "resample_data_or_seg_to_shape", + "resampling_fn_seg": "resample_data_or_seg_to_shape", + "resampling_fn_data_kwargs": { + "is_seg": false, + "order": 3, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_seg_kwargs": { + "is_seg": true, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_probabilities": "resample_data_or_seg_to_shape", + "resampling_fn_probabilities_kwargs": { + "is_seg": false, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "batch_dice": false, + "next_stage": "3d_cascade_fullres" + }, + "3d_fullres": { + "data_identifier": "nnUNetPlans_3d_fullres", + "preprocessor_name": "DefaultPreprocessor", + "batch_size": 2, + "patch_size": [ + 160, + 128, + 112 + ], + "median_image_size_in_voxels": [ + 249.0, + 240.0, + 179.0 + ], + "spacing": [ + 1.0, + 1.0, + 1.0 + ], + "normalization_schemes": [ + "ZScoreNormalization" + ], + "use_mask_for_norm": [ + false + ], + "UNet_class_name": "PlainConvUNet", + "UNet_base_num_features": 32, + "n_conv_per_stage_encoder": [ + 2, + 2, + 2, + 2, + 2, + 2 + ], + "n_conv_per_stage_decoder": [ + 2, + 2, + 2, + 2, + 2 + ], + "num_pool_per_axis": [ + 5, + 5, + 4 + ], + "pool_op_kernel_sizes": [ + [ + 1, + 1, + 1 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 2 + ], + [ + 2, + 2, + 1 + ] + ], + "conv_kernel_sizes": [ + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ], + [ + 3, + 3, + 3 + ] + ], + "unet_max_num_features": 320, + "resampling_fn_data": "resample_data_or_seg_to_shape", + "resampling_fn_seg": "resample_data_or_seg_to_shape", + "resampling_fn_data_kwargs": { + "is_seg": false, + "order": 3, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_seg_kwargs": { + "is_seg": true, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "resampling_fn_probabilities": "resample_data_or_seg_to_shape", + "resampling_fn_probabilities_kwargs": { + "is_seg": false, + "order": 1, + "order_z": 0, + "force_separate_z": null + }, + "batch_dice": true + }, + "3d_cascade_fullres": { + "inherits_from": "3d_fullres", + "previous_stage": "3d_lowres" + } + }, + "experiment_planner_used": "ExperimentPlanner", + "label_manager": "LabelManager", + "foreground_intensity_properties_per_channel": { + "0": { + "max": 8952.697265625, + "mean": 528.2257080078125, + "median": 317.0, + "min": -24.0, + "percentile_00_5": 20.0, + "percentile_99_5": 3972.0, + "std": 636.9642333984375 + } + } +} \ No newline at end of file diff --git a/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json b/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json new file mode 100644 index 0000000..e17a9a9 --- /dev/null +++ b/output/split_0/temp_working_dir/s2_dlicv/predict_from_raw_data_args.json @@ -0,0 +1,11 @@ +{ + "folder_with_segs_from_prev_stage": null, + "list_of_lists_or_source_folder": "/Users/spirosmag/Documents/NiChart_DLMUSE/output/split_0/temp_working_dir/s2_dlicv/renamed_image", + "num_parts": 1, + "num_processes_preprocessing": 2, + "num_processes_segmentation_export": 2, + "output_folder_or_list_of_truncated_output_files": "/Users/spirosmag/Documents/NiChart_DLMUSE/output/split_0/temp_working_dir/s2_dlicv", + "overwrite": true, + "part_id": 0, + "save_probabilities": false +} \ No newline at end of file diff --git a/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json b/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json new file mode 100644 index 0000000..018a8a9 --- /dev/null +++ b/output/split_0/temp_working_dir/s2_dlicv/renamed_image/renaming.json @@ -0,0 +1,10 @@ +{ + "IXI114-Guys-0737-T1_LPS.nii.gz": "case_ 000_0000.nii.gz", + "IXI100-Guys-0747-T1_LPS.nii.gz": "case_ 001_0000.nii.gz", + "IXI107-Guys-0761-T1_LPS.nii.gz": "case_ 002_0000.nii.gz", + "IXI101-Guys-0749-T1_LPS.nii.gz": "case_ 003_0000.nii.gz", + "IXI126-HH-1437-T1_LPS.nii.gz": "case_ 004_0000.nii.gz", + "IXI106-Guys-0760-T1_LPS.nii.gz": "case_ 005_0000.nii.gz", + "IXI105-HH-1471-T1_LPS.nii.gz": "case_ 006_0000.nii.gz", + "IXI104-HH-1450-T1_LPS.nii.gz": "case_ 007_0000.nii.gz" +} \ No newline at end of file