From 5c6ce0bbf030fb4ec81396e1cb67edb3e194d383 Mon Sep 17 00:00:00 2001 From: Francisco Rivera Valverde <44504424+franchuterivera@users.noreply.github.com> Date: Thu, 11 Mar 2021 12:44:52 +0100 Subject: [PATCH] FIX_123 (#133) * FIX_123 * Better debug msg * at least 1 config in regression * Return self in _fit() --- autoPyTorch/api/base_task.py | 2 +- .../configs/default_pipeline_options.json | 1 - autoPyTorch/pipeline/base_pipeline.py | 1 - .../training/trainer/base_trainer.py | 3 -- .../training/trainer/base_trainer_choice.py | 33 ++----------------- test/conftest.py | 1 - test/test_api/test_api.py | 2 +- .../test_tabular_classification.py | 7 ++-- test/test_pipeline/test_tabular_regression.py | 2 +- 9 files changed, 11 insertions(+), 41 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 1f60454ff..c0548012d 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -227,7 +227,7 @@ def set_pipeline_config( Args: **pipeline_config_kwargs: Valid config options include "num_run", "device", "budget_type", "epochs", "runtime", "torch_num_threads", - "early_stopping", "use_tensorboard_logger", "use_pynisher", + "early_stopping", "use_tensorboard_logger", "metrics_during_training" Returns: diff --git a/autoPyTorch/configs/default_pipeline_options.json b/autoPyTorch/configs/default_pipeline_options.json index 26b1b879e..c5481080c 100644 --- a/autoPyTorch/configs/default_pipeline_options.json +++ b/autoPyTorch/configs/default_pipeline_options.json @@ -7,6 +7,5 @@ "torch_num_threads": 1, "early_stopping": 20, "use_tensorboard_logger": "False", - "use_pynisher": "False", "metrics_during_training": "True" } diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index a74b530b3..2e70478a3 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -546,6 +546,5 @@ def get_default_pipeline_options() -> Dict[str, Any]: 'torch_num_threads': 1, 'early_stopping': 10, 'use_tensorboard_logger': True, - 'use_pynisher': False, 'metrics_during_training': True } diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index fac404aef..f61d334e2 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -29,9 +29,6 @@ def __init__(self, It also allows to define a 'epoch_or_time' budget type, which means, the first of them both which is exhausted, is honored - - In case use_pynisher is set to false, this function allows to - still terminate the task with a time domain consideration """ self.start_time = time.time() self.budget_type = budget_type diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index f8a76b5e7..564d199dd 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -12,8 +12,6 @@ import numpy as np -import pynisher - import torch from torch.optim import Optimizer from torch.optim.lr_scheduler import _LRScheduler @@ -196,37 +194,16 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT, ) - fit_function = self._fit - if X['use_pynisher']: - wall_time_in_s = X['runtime'] if 'runtime' in X else None - memory_limit = X['cpu_memory_limit'] if 'cpu_memory_limit' in X else None - fit_function = pynisher.enforce_limits( - wall_time_in_s=wall_time_in_s, - mem_in_mb=memory_limit, - logger=self.logger - )(self._fit) - # Call the actual fit function. - state_dict = fit_function( + self._fit( X=X, y=y, **kwargs ) - if X['use_pynisher']: - # Normally the X[network] is a pointer to the object, so at the - # end, when we train using X, the pipeline network is updated for free - # If we do multiprocessing (because of pynisher) we have to update - # X[network] manually. we do so in a way that every pipeline component - # can see this new network -- via an update, not overwrite of the pointer - state_dict = state_dict.result - X['network'].load_state_dict(state_dict) - - # TODO: when have the optimizer code, the pynisher object might have failed - # We should process this function as Failure if so trough fit_function.exit_status return cast(autoPyTorchComponent, self.choice) - def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Module: + def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice': """ Fits a component by using an input dictionary with pre-requisites @@ -359,7 +336,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Modu # Tag as fitted self.fitted_ = True - return X['network'].state_dict() + return self def early_stop_handler(self, X: Dict[str, Any]) -> bool: """ @@ -444,10 +421,6 @@ def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None: raise ValueError('Need a backend to provide the working directory, ' "yet 'backend' was not found in the fit dictionary") - # For resource allocation, we need to know if pynisher is enabled - if 'use_pynisher' not in X: - raise ValueError('To fit a Trainer, expected fit dictionary to have use_pynisher') - # Whether we should evaluate metrics during training or no if 'metrics_during_training' not in X: raise ValueError('Missing metrics_during_training in the fit dictionary') diff --git a/test/conftest.py b/test/conftest.py index 95df27810..04077ca08 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -290,7 +290,6 @@ def get_fit_dictionary(X, y, validator, backend): 'early_stopping': 10, 'working_dir': '/tmp', 'use_tensorboard_logger': True, - 'use_pynisher': False, 'metrics_during_training': True, 'split_id': 0, 'backend': backend, diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 389dd3c95..d30593bb0 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -268,7 +268,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): # Check that smac was able to find proper models succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( ) if 'SUCCESS' in str(run_value.status)] - assert len(succesful_runs) > 1, [(k, v) for k, v in estimator.run_history.data.items()] + assert len(succesful_runs) >= 1, [(k, v) for k, v in estimator.run_history.data.items()] # Search for an existing run key in disc. A individual model might have # a timeout and hence was not written to disc diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index 5920568ae..ef508dc7b 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -63,7 +63,10 @@ def test_pipeline_fit(self, fit_dictionary_tabular): cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary_tabular) + try: + pipeline.fit(fit_dictionary_tabular) + except Exception as e: + pytest.fail(f"Failed due to {e} for config={config}") # To make sure we fitted the model, there should be a # run summary object with accuracy @@ -201,7 +204,7 @@ def test_remove_key_check_requirements(self, fit_dictionary_tabular): """Makes sure that when a key is removed from X, correct error is outputted""" pipeline = TabularClassificationPipeline( dataset_properties=fit_dictionary_tabular['dataset_properties']) - for key in ['num_run', 'device', 'split_id', 'use_pynisher', 'torch_num_threads', 'dataset_properties']: + for key in ['num_run', 'device', 'split_id', 'torch_num_threads', 'dataset_properties']: fit_dictionary_tabular_copy = fit_dictionary_tabular.copy() fit_dictionary_tabular_copy.pop(key) with pytest.raises(ValueError, match=r"To fit .+?, expected fit dictionary to have"): diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index 56136befe..c5135522e 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -188,7 +188,7 @@ def test_remove_key_check_requirements(self, fit_dictionary_tabular): """Makes sure that when a key is removed from X, correct error is outputted""" pipeline = TabularRegressionPipeline( dataset_properties=fit_dictionary_tabular['dataset_properties']) - for key in ['num_run', 'device', 'split_id', 'use_pynisher', 'torch_num_threads', 'dataset_properties']: + for key in ['num_run', 'device', 'split_id', 'torch_num_threads', 'dataset_properties']: fit_dictionary_tabular_copy = fit_dictionary_tabular.copy() fit_dictionary_tabular_copy.pop(key) with pytest.raises(ValueError, match=r"To fit .+?, expected fit dictionary to have"):