From 9d3a3b684a100b5d47012b2184d1fdfa36ce0d1a Mon Sep 17 00:00:00 2001 From: StevieSong Date: Mon, 15 Jun 2020 16:17:45 -0400 Subject: [PATCH 1/3] #260 #324 #326 --- ml4cvd/tensor_generators.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ml4cvd/tensor_generators.py b/ml4cvd/tensor_generators.py index 1a38b1cc4..e372a8b3a 100755 --- a/ml4cvd/tensor_generators.py +++ b/ml4cvd/tensor_generators.py @@ -90,12 +90,12 @@ def __init__( self._started = False self.workers = [] self.worker_instances = [] + if num_workers == 0: + num_workers = 1 # The one worker is the main thread self.batch_size, self.input_maps, self.output_maps, self.num_workers, self.cache_size, self.weights, self.name, self.keep_paths = \ batch_size, input_maps, output_maps, num_workers, cache_size, weights, name, keep_paths self.true_epochs = 0 self.stats_string = "" - if num_workers == 0: - num_workers = 1 # The one worker is the main thread if weights is None: worker_paths = np.array_split(paths, num_workers) self.true_epoch_lens = list(map(len, worker_paths)) @@ -148,7 +148,7 @@ def _init_workers(self): ) process.start() self.workers.append(process) - logging.info(f"Started {i} {self.name.replace('_', ' ')}s with cache size {self.cache_size/1e9}GB.") + logging.info(f"Started {i + 1} {self.name.replace('_', ' ')}s with cache size {self.cache_size/1e9}GB.") def set_worker_paths(self, paths: List[Path]): """In the single worker case, set the worker's paths.""" @@ -227,7 +227,7 @@ def aggregate_and_print_stats(self): f"{stats['Tensors presented']:0.0f} tensors were presented.", f"{stats['skipped_paths']} paths were skipped because they previously failed.", f"{error_info}", - f"{self.stats_string}" + f"{self.stats_string}", ]) logging.info(f"\n!!!!>~~~~~~~~~~~~ {self.name} completed true epoch {self.true_epochs} ~~~~~~~~~~~~ Date: Mon, 15 Jun 2020 17:17:13 -0400 Subject: [PATCH 2/3] get stats q --- ml4cvd/tensor_generators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml4cvd/tensor_generators.py b/ml4cvd/tensor_generators.py index e372a8b3a..2c85b8f99 100755 --- a/ml4cvd/tensor_generators.py +++ b/ml4cvd/tensor_generators.py @@ -161,11 +161,11 @@ def set_worker_paths(self, paths: List[Path]): def __next__(self) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], Optional[List[str]]]: if not self._started: self._init_workers() + if self.stats_q.qsize() == self.num_workers: + self.aggregate_and_print_stats() if self.run_on_main_thread: return next(self.worker_instances[0]) else: - if self.stats_q.qsize() == self.num_workers: - self.aggregate_and_print_stats() return self.q.get(TENSOR_GENERATOR_TIMEOUT) def aggregate_and_print_stats(self): From 8580ad0e1775ab5f29e8cf1cd9a1bf706b2906a0 Mon Sep 17 00:00:00 2001 From: StevieSong Date: Thu, 18 Jun 2020 17:01:33 -0400 Subject: [PATCH 3/3] variable names --- ml4cvd/tensor_generators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ml4cvd/tensor_generators.py b/ml4cvd/tensor_generators.py index 2c85b8f99..840ae4c0c 100755 --- a/ml4cvd/tensor_generators.py +++ b/ml4cvd/tensor_generators.py @@ -789,10 +789,10 @@ def test_train_valid_tensor_generators( ) weights = None - train_workers = int(training_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0) - valid_workers = int(validation_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0) - generate_train = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, train_paths, train_workers, cache_size, weights, keep_paths, mixup_alpha, name='train_worker', siamese=siamese, augment=True, sample_weight=sample_weight) - generate_valid = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, valid_paths, valid_workers, cache_size, weights, keep_paths, name='validation_worker', siamese=siamese, augment=False) + num_train_workers = int(training_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0) + num_valid_workers = int(validation_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0) + generate_train = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, train_paths, num_train_workers, cache_size, weights, keep_paths, mixup_alpha, name='train_worker', siamese=siamese, augment=True, sample_weight=sample_weight) + generate_valid = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, valid_paths, num_valid_workers, cache_size, weights, keep_paths, name='validation_worker', siamese=siamese, augment=False) generate_test = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, test_paths, num_workers, 0, weights, keep_paths or keep_paths_test, name='test_worker', siamese=siamese, augment=False) return generate_train, generate_valid, generate_test