From 4a298527c5f256ceb6999a61a28d451f6251f2ce Mon Sep 17 00:00:00 2001 From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> Date: Tue, 9 Nov 2021 13:49:38 +0100 Subject: [PATCH] [FIX] Minor Fixes (#306) * added the minor fixes from reg_cocktail * fixes to dropout removing unnecessary lines * fix in data loadeer * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * fix tests with dropout * Fix tests Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> --- autoPyTorch/api/base_task.py | 60 +++++++++++-------- autoPyTorch/api/tabular_classification.py | 10 ++-- autoPyTorch/api/tabular_regression.py | 6 +- .../network_backbone/ShapedResNetBackbone.py | 18 +++--- .../setup/network_backbone/utils.py | 10 ++-- .../setup/network_head/fully_connected.py | 33 ++++++---- .../setup/network_initializer/XavierInit.py | 2 +- .../training/data_loader/base_data_loader.py | 12 +++- .../pipeline/tabular_classification.py | 15 +---- .../components/setup/test_setup.py | 10 ++-- 10 files changed, 101 insertions(+), 75 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 2ab5650b1..270240813 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -36,7 +36,7 @@ STRING_TO_TASK_TYPES, ) from autoPyTorch.data.base_validator import BaseInputValidator -from autoPyTorch.datasets.base_dataset import BaseDataset +from autoPyTorch.datasets.base_dataset import BaseDataset, BaseDatasetPropertiesType from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.ensemble.singlebest_ensemble import SingleBest @@ -105,6 +105,8 @@ class BaseTask: Args: seed (int), (default=1): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. + n_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): Number of models added to the ensemble built by @@ -133,6 +135,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -151,6 +154,7 @@ def __init__( ) -> None: self.seed = seed self.n_jobs = n_jobs + self.n_threads = n_threads self.ensemble_size = ensemble_size self.ensemble_nbest = ensemble_nbest self.max_models_on_disc = max_models_on_disc @@ -1064,6 +1068,28 @@ def _search( return self + def _get_fit_dictionary( + self, + dataset_properties: Dict[str, BaseDatasetPropertiesType], + dataset: BaseDataset, + split_id: int = 0 + ) -> Dict[str, Any]: + X_test = dataset.test_tensors[0].copy() if dataset.test_tensors is not None else None + y_test = dataset.test_tensors[1].copy() if dataset.test_tensors is not None else None + X: Dict[str, Any] = dict({'dataset_properties': dataset_properties, + 'backend': self._backend, + 'X_train': dataset.train_tensors[0].copy(), + 'y_train': dataset.train_tensors[1].copy(), + 'X_test': X_test, + 'y_test': y_test, + 'train_indices': dataset.splits[split_id][0], + 'val_indices': dataset.splits[split_id][1], + 'split_id': split_id, + 'num_run': self._backend.get_next_num_run(), + }) + X.update(self.pipeline_options) + return X + def refit( self, dataset: BaseDataset, @@ -1107,18 +1133,6 @@ def refit( dataset_properties = dataset.get_dataset_properties(dataset_requirements) self._backend.save_datamanager(dataset) - X: Dict[str, Any] = dict({'dataset_properties': dataset_properties, - 'backend': self._backend, - 'X_train': dataset.train_tensors[0], - 'y_train': dataset.train_tensors[1], - 'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None, - 'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None, - 'train_indices': dataset.splits[split_id][0], - 'val_indices': dataset.splits[split_id][1], - 'split_id': split_id, - 'num_run': self._backend.get_next_num_run(), - }) - X.update(self.pipeline_options) if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None: self._load_models() @@ -1134,6 +1148,10 @@ def refit( # try to fit the model. If it fails, shuffle the data. This # could alleviate the problem in algorithms that depend on # the ordering of the data. + X = self._get_fit_dictionary( + dataset_properties=dataset_properties, + dataset=dataset, + split_id=split_id) fit_and_suppress_warnings(self._logger, model, X, y=None) self._clean_logger() @@ -1187,18 +1205,10 @@ def fit(self, pipeline.set_hyperparameters(pipeline_config) # initialise fit dictionary - X: Dict[str, Any] = dict({'dataset_properties': dataset_properties, - 'backend': self._backend, - 'X_train': dataset.train_tensors[0], - 'y_train': dataset.train_tensors[1], - 'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None, - 'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None, - 'train_indices': dataset.splits[split_id][0], - 'val_indices': dataset.splits[split_id][1], - 'split_id': split_id, - 'num_run': self._backend.get_next_num_run(), - }) - X.update(self.pipeline_options) + X = self._get_fit_dictionary( + dataset_properties=dataset_properties, + dataset=dataset, + split_id=split_id) fit_and_suppress_warnings(self._logger, pipeline, X, y=None) diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 20be4346d..659e50c68 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -26,10 +26,10 @@ class TabularClassificationTask(BaseTask): """ Tabular Classification API to the pipelines. Args: - seed (int): - seed to be used for reproducibility. - n_jobs (int), (default=1): - number of consecutive processes to spawn. + seed (int), (default=1): seed to be used for reproducibility. + n_jobs (int), (default=1): number of consecutive processes to spawn. + n_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): @@ -64,6 +64,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -82,6 +83,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, + n_threads=n_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index b88bf7cd9..e4064461d 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -26,8 +26,10 @@ class TabularRegressionTask(BaseTask): """ Tabular Regression API to the pipelines. Args: - seed (int): seed to be used for reproducibility. + seed (int), (default=1): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. + n_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): Number of models added to the ensemble built by @@ -56,6 +58,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -74,6 +77,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, + n_threads=n_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 7886ab827..efe09141d 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -39,19 +39,19 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> torch.nn.Sequential: self.config.update( {"num_units_%d" % (i): num for i, num in enumerate(neuron_counts)} ) - # we are skipping the last layer, as the function get_shaped_neuron_counts - # is built for getting neuron counts, so it will add the out_features to - # the last layer. However, in dropout we dont want to have that, we just - # want to use the shape and not worry about the output. if self.config['use_dropout']: + # the last dropout ("neuron") value is skipped since it will be equal + # to output_feat, which is 0. This is also skipped when getting the + # n_units for the architecture, since, it is mostly implemented for the + # output layer, which is part of the head and not of the backbone. dropout_shape = get_shaped_neuron_counts( - self.config['resnet_shape'], 0, 0, 1000, self.config['num_groups'] + 1 + shape=self.config['resnet_shape'], + in_feat=0, + out_feat=0, + max_neurons=self.config["max_dropout"], + layer_count=self.config['num_groups'] + 1, )[:-1] - dropout_shape = [ - dropout / 1000 * self.config["max_dropout"] for dropout in dropout_shape - ] - self.config.update( {"dropout_%d" % (i + 1): dropout for i, dropout in enumerate(dropout_shape)} ) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index aa46876fa..75c16905a 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -94,7 +94,7 @@ def backward(ctx: typing.Any, def shake_get_alpha_beta(is_training: bool, is_cuda: bool ) -> typing.Tuple[torch.tensor, torch.tensor]: - if is_training: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) @@ -118,11 +118,11 @@ def shake_drop_get_bl( ) -> torch.tensor: pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake) - if not is_training: - # Move to torch.randn(1) for reproducibility - bl = torch.tensor(1.0) if torch.randn(1) <= pl else torch.tensor(0.0) if is_training: - bl = torch.tensor(pl) + # Move to torch.rand(1) for reproducibility + bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0) + else: + bl = torch.as_tensor(pl) if is_cuda: bl = bl.cuda() diff --git a/autoPyTorch/pipeline/components/setup/network_head/fully_connected.py b/autoPyTorch/pipeline/components/setup/network_head/fully_connected.py index 4dcdd941f..99762bbcf 100644 --- a/autoPyTorch/pipeline/components/setup/network_head/fully_connected.py +++ b/autoPyTorch/pipeline/components/setup/network_head/fully_connected.py @@ -59,23 +59,36 @@ def get_hyperparameter_search_space( ) -> ConfigurationSpace: cs = ConfigurationSpace() - min_num_layers, max_num_layers = num_layers.value_range - num_layers_hp = get_hyperparameter(num_layers, UniformIntegerHyperparameter) + min_num_layers: int = num_layers.value_range[0] # type: ignore + max_num_layers: int = num_layers.value_range[-1] # type: ignore + num_layers_is_constant = (min_num_layers == max_num_layers) + num_layers_hp = get_hyperparameter(num_layers, UniformIntegerHyperparameter) activation_hp = get_hyperparameter(activation, CategoricalHyperparameter) + cs.add_hyperparameter(num_layers_hp) - cs.add_hyperparameters([num_layers_hp, activation_hp]) - cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1)) + if not num_layers_is_constant: + cs.add_hyperparameter(activation_hp) + cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1)) + elif max_num_layers > 1: + # only add activation if we have more than 1 layer + cs.add_hyperparameter(activation_hp) - for i in range(1, int(max_num_layers)): - num_units_search_space = HyperparameterSearchSpace(hyperparameter=f"units_layer_{i}", - value_range=units_layer.value_range, - default_value=units_layer.default_value, - log=units_layer.log) + for i in range(1, max_num_layers + 1): + num_units_search_space = HyperparameterSearchSpace( + hyperparameter=f"units_layer_{i}", + value_range=units_layer.value_range, + default_value=units_layer.default_value, + log=units_layer.log, + ) num_units_hp = get_hyperparameter(num_units_search_space, UniformIntegerHyperparameter) cs.add_hyperparameter(num_units_hp) - if i >= int(min_num_layers): + if i >= min_num_layers and not num_layers_is_constant: + # In the case of a constant, the max and min number of layers are the same. + # So no condition is needed. If it is not a constant but a hyperparameter, + # then a condition has to be made so that it accounts for the value of the + # hyperparameter. cs.add_condition(CS.GreaterThanCondition(num_units_hp, num_layers_hp, i)) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py index 240cd4f14..3470b067a 100644 --- a/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py +++ b/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py @@ -24,7 +24,7 @@ def initialization(m: torch.nn.Module) -> None: torch.nn.Conv2d, torch.nn.Conv3d, torch.nn.Linear)): - torch.nn.init.xavier_uniform_(m.weight.data) + torch.nn.init.xavier_normal(m.weight.data) if m.bias is not None and self.bias_strategy == 'Zero': torch.nn.init.constant_(m.bias.data, 0.0) return initialization diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index bc2d6a9fa..83693fdce 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -37,6 +37,7 @@ def __init__(self, batch_size: int = 64, self.batch_size = batch_size self.train_data_loader = None # type: Optional[torch.utils.data.DataLoader] self.val_data_loader = None # type: Optional[torch.utils.data.DataLoader] + self.test_data_loader: Optional[torch.utils.data.DataLoader] = None # We also support existing datasets! self.dataset = None @@ -69,7 +70,8 @@ def transform(self, X: np.ndarray) -> np.ndarray: np.ndarray: Transformed features """ X.update({'train_data_loader': self.train_data_loader, - 'val_data_loader': self.val_data_loader}) + 'val_data_loader': self.val_data_loader, + 'test_data_loader': self.test_data_loader}) return X def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: @@ -112,7 +114,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: shuffle=True, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), - drop_last=X.get('drop_last', True), + drop_last=X.get('drop_last', False), collate_fn=custom_collate_fn, ) @@ -126,6 +128,11 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: collate_fn=custom_collate_fn, ) + if X.get('X_test', None) is not None: + self.test_data_loader = self.get_loader(X=X['X_test'], + y=X['y_test'], + batch_size=self.batch_size) + return self def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size: int = np.inf, @@ -137,6 +144,7 @@ def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size: dataset = BaseDataset( train_tensors=(X, y), + seed=self.random_state.get_state()[1][0], # This dataset is used for loading test data in a batched format train_transforms=self.test_transform, val_transforms=self.test_transform, diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py index 65abcf3c2..461bd6add 100644 --- a/autoPyTorch/pipeline/tabular_classification.py +++ b/autoPyTorch/pipeline/tabular_classification.py @@ -7,7 +7,6 @@ import numpy as np -import sklearn.preprocessing from sklearn.base import ClassifierMixin import torch @@ -101,13 +100,8 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: loader = self.named_steps['data_loader'].get_loader(X=X) pred = self.named_steps['network'].predict(loader) if isinstance(self.dataset_properties['output_shape'], int): - proba = pred[:, :self.dataset_properties['output_shape']] - normalizer = proba.sum(axis=1)[:, np.newaxis] - normalizer[normalizer == 0.0] = 1.0 - proba /= normalizer - - return proba - + # The final layer is always softmax now (`pred` already gives pseudo proba) + return pred else: raise ValueError("Expected output_shape to be integer, got {}," "Tabular Classification only supports 'binary' and 'multiclass' outputs" @@ -149,11 +143,6 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None) y[batch_from:batch_to] = pred_prob.astype(np.float32) - # Neural networks might not be fit to produce a [0-1] output - # For instance, after small number of epochs. - y = np.clip(y, 0, 1) - y = sklearn.preprocessing.normalize(y, axis=1, norm='l1') - return y def score(self, X: np.ndarray, y: np.ndarray, diff --git a/test/test_pipeline/components/setup/test_setup.py b/test/test_pipeline/components/setup/test_setup.py index 5d65ac14a..9d66953b2 100644 --- a/test/test_pipeline/components/setup/test_setup.py +++ b/test/test_pipeline/components/setup/test_setup.py @@ -483,12 +483,12 @@ def test_dropout(self, resnet_shape): backbone = resnet_backbone.build_backbone((100, 5)) dropout_probabilites = [resnet_backbone.config[key] for key in resnet_backbone.config if 'dropout_' in key] dropout_shape = get_shaped_neuron_counts( - resnet_shape, 0, 0, 1000, num_groups + 1 + shape=resnet_shape, + in_feat=0, + out_feat=0, + max_neurons=max_dropout, + layer_count=num_groups + 1, )[:-1] - - dropout_shape = [ - dropout / 1000 * max_dropout for dropout in dropout_shape - ] blocks_dropout = [] for block in backbone: if isinstance(block, torch.nn.Sequential):