From a11a2e4028ed0620ed1674f9cd17a71e01eaead5 Mon Sep 17 00:00:00 2001 From: John Wilkie <124276291+JBWilkie@users.noreply.github.com> Date: Tue, 30 Jan 2024 10:18:41 +0000 Subject: [PATCH] Identified and re-named all multi-processed functions vs. multi-threaded ones (#771) --- darwin/dataset/local_dataset.py | 6 +++--- darwin/dataset/remote_dataset.py | 6 +++--- darwin/dataset/utils.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index b2c4dff89..2286fb1a8 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -350,14 +350,14 @@ def annotation_type_supported(self, annotation) -> bool: ) def measure_mean_std( - self, multi_threaded: bool = True + self, multi_processed: bool = True ) -> Tuple[np.ndarray, np.ndarray]: """ Computes mean and std of trained images, given the train loader. Parameters ---------- - multi_threaded : bool, default: True + multi_processed : bool, default: True Uses multiprocessing to download the dataset in parallel. Returns @@ -367,7 +367,7 @@ def measure_mean_std( std : ndarray[double] Standard deviation (for each channel) of all pixels of the images in the input folder. """ - if multi_threaded: + if multi_processed: # Set up a pool of workers with mp.Pool(mp.cpu_count()) as pool: # Online mean diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 7f9cc0467..9f7d967c5 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -183,7 +183,7 @@ def pull( *, release: Optional[Release] = None, blocking: bool = True, - multi_threaded: bool = True, + multi_processed: bool = True, only_annotations: bool = False, force_replace: bool = False, remove_extra: bool = False, @@ -203,7 +203,7 @@ def pull( The release to pull. blocking : bool, default: True If False, the dataset is not downloaded and a generator function is returned instead. - multi_threaded : bool, default: True + multi_processed : bool, default: True Uses multiprocessing to download the dataset in parallel. If blocking is False this has no effect. only_annotations : bool, default: False Download only the annotations and no corresponding images. @@ -364,7 +364,7 @@ def pull( successes, errors = exhaust_generator( progress=progress(), count=count, - multi_threaded=multi_threaded, + multi_processed=multi_processed, worker_count=max_workers, ) if errors: diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index 7aa3fa1bc..7e9ecfee9 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -223,12 +223,12 @@ def _f(x: Any) -> Any: def exhaust_generator( progress: Generator, count: int, - multi_threaded: bool, + multi_processed: bool, worker_count: Optional[int] = None, ) -> Tuple[List[Dict[str, Any]], List[Exception]]: """ - Exhausts the generator passed as parameter. Can be done multi threaded if desired. + Exhausts the generator passed as parameter. Can be done multi processed if desired. Creates and returns a coco record from the given annotation. Uses ``BoxMode.XYXY_ABS`` from ``detectron2.structures`` if available, defaults to ``box_mode = 0`` @@ -260,7 +260,7 @@ def exhaust_generator( """ successes = [] errors = [] - if multi_threaded: + if multi_processed: progress_bar: ProgressBar = ProgressBar(total=count) responses = []