Skip to content

Commit

Permalink
[FIX] Minor Fixes (automl#306)
Browse files Browse the repository at this point in the history
* added the minor fixes from reg_cocktail

* fixes to dropout removing unnecessary lines

* fix in data loadeer

* Apply suggestions from code review

Co-authored-by: nabenabe0928 <[email protected]>

* fix tests with dropout

* Fix tests

Co-authored-by: nabenabe0928 <[email protected]>
  • Loading branch information
ravinkohli and nabenabe0928 authored Nov 9, 2021
1 parent a11caf4 commit 4a29852
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 75 deletions.
60 changes: 35 additions & 25 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
STRING_TO_TASK_TYPES,
)
from autoPyTorch.data.base_validator import BaseInputValidator
from autoPyTorch.datasets.base_dataset import BaseDataset
from autoPyTorch.datasets.base_dataset import BaseDataset, BaseDatasetPropertiesType
from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
Expand Down Expand Up @@ -105,6 +105,8 @@ class BaseTask:
Args:
seed (int), (default=1): seed to be used for reproducibility.
n_jobs (int), (default=1): number of consecutive processes to spawn.
n_threads (int), (default=1):
number of threads to use for each process.
logging_config (Optional[Dict]): specifies configuration
for logging, if None, it is loaded from the logging.yaml
ensemble_size (int), (default=50): Number of models added to the ensemble built by
Expand Down Expand Up @@ -133,6 +135,7 @@ def __init__(
self,
seed: int = 1,
n_jobs: int = 1,
n_threads: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
Expand All @@ -151,6 +154,7 @@ def __init__(
) -> None:
self.seed = seed
self.n_jobs = n_jobs
self.n_threads = n_threads
self.ensemble_size = ensemble_size
self.ensemble_nbest = ensemble_nbest
self.max_models_on_disc = max_models_on_disc
Expand Down Expand Up @@ -1064,6 +1068,28 @@ def _search(

return self

def _get_fit_dictionary(
self,
dataset_properties: Dict[str, BaseDatasetPropertiesType],
dataset: BaseDataset,
split_id: int = 0
) -> Dict[str, Any]:
X_test = dataset.test_tensors[0].copy() if dataset.test_tensors is not None else None
y_test = dataset.test_tensors[1].copy() if dataset.test_tensors is not None else None
X: Dict[str, Any] = dict({'dataset_properties': dataset_properties,
'backend': self._backend,
'X_train': dataset.train_tensors[0].copy(),
'y_train': dataset.train_tensors[1].copy(),
'X_test': X_test,
'y_test': y_test,
'train_indices': dataset.splits[split_id][0],
'val_indices': dataset.splits[split_id][1],
'split_id': split_id,
'num_run': self._backend.get_next_num_run(),
})
X.update(self.pipeline_options)
return X

def refit(
self,
dataset: BaseDataset,
Expand Down Expand Up @@ -1107,18 +1133,6 @@ def refit(
dataset_properties = dataset.get_dataset_properties(dataset_requirements)
self._backend.save_datamanager(dataset)

X: Dict[str, Any] = dict({'dataset_properties': dataset_properties,
'backend': self._backend,
'X_train': dataset.train_tensors[0],
'y_train': dataset.train_tensors[1],
'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None,
'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None,
'train_indices': dataset.splits[split_id][0],
'val_indices': dataset.splits[split_id][1],
'split_id': split_id,
'num_run': self._backend.get_next_num_run(),
})
X.update(self.pipeline_options)
if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
self._load_models()

Expand All @@ -1134,6 +1148,10 @@ def refit(
# try to fit the model. If it fails, shuffle the data. This
# could alleviate the problem in algorithms that depend on
# the ordering of the data.
X = self._get_fit_dictionary(
dataset_properties=dataset_properties,
dataset=dataset,
split_id=split_id)
fit_and_suppress_warnings(self._logger, model, X, y=None)

self._clean_logger()
Expand Down Expand Up @@ -1187,18 +1205,10 @@ def fit(self,
pipeline.set_hyperparameters(pipeline_config)

# initialise fit dictionary
X: Dict[str, Any] = dict({'dataset_properties': dataset_properties,
'backend': self._backend,
'X_train': dataset.train_tensors[0],
'y_train': dataset.train_tensors[1],
'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None,
'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None,
'train_indices': dataset.splits[split_id][0],
'val_indices': dataset.splits[split_id][1],
'split_id': split_id,
'num_run': self._backend.get_next_num_run(),
})
X.update(self.pipeline_options)
X = self._get_fit_dictionary(
dataset_properties=dataset_properties,
dataset=dataset,
split_id=split_id)

fit_and_suppress_warnings(self._logger, pipeline, X, y=None)

Expand Down
10 changes: 6 additions & 4 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class TabularClassificationTask(BaseTask):
"""
Tabular Classification API to the pipelines.
Args:
seed (int):
seed to be used for reproducibility.
n_jobs (int), (default=1):
number of consecutive processes to spawn.
seed (int), (default=1): seed to be used for reproducibility.
n_jobs (int), (default=1): number of consecutive processes to spawn.
n_threads (int), (default=1):
number of threads to use for each process.
logging_config (Optional[Dict]):
specifies configuration for logging, if None, it is loaded from the logging.yaml
ensemble_size (int), (default=50):
Expand Down Expand Up @@ -64,6 +64,7 @@ def __init__(
self,
seed: int = 1,
n_jobs: int = 1,
n_threads: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
Expand All @@ -82,6 +83,7 @@ def __init__(
super().__init__(
seed=seed,
n_jobs=n_jobs,
n_threads=n_threads,
logging_config=logging_config,
ensemble_size=ensemble_size,
ensemble_nbest=ensemble_nbest,
Expand Down
6 changes: 5 additions & 1 deletion autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ class TabularRegressionTask(BaseTask):
"""
Tabular Regression API to the pipelines.
Args:
seed (int): seed to be used for reproducibility.
seed (int), (default=1): seed to be used for reproducibility.
n_jobs (int), (default=1): number of consecutive processes to spawn.
n_threads (int), (default=1):
number of threads to use for each process.
logging_config (Optional[Dict]): specifies configuration
for logging, if None, it is loaded from the logging.yaml
ensemble_size (int), (default=50): Number of models added to the ensemble built by
Expand Down Expand Up @@ -56,6 +58,7 @@ def __init__(
self,
seed: int = 1,
n_jobs: int = 1,
n_threads: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
Expand All @@ -74,6 +77,7 @@ def __init__(
super().__init__(
seed=seed,
n_jobs=n_jobs,
n_threads=n_threads,
logging_config=logging_config,
ensemble_size=ensemble_size,
ensemble_nbest=ensemble_nbest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,19 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> torch.nn.Sequential:
self.config.update(
{"num_units_%d" % (i): num for i, num in enumerate(neuron_counts)}
)
# we are skipping the last layer, as the function get_shaped_neuron_counts
# is built for getting neuron counts, so it will add the out_features to
# the last layer. However, in dropout we dont want to have that, we just
# want to use the shape and not worry about the output.
if self.config['use_dropout']:
# the last dropout ("neuron") value is skipped since it will be equal
# to output_feat, which is 0. This is also skipped when getting the
# n_units for the architecture, since, it is mostly implemented for the
# output layer, which is part of the head and not of the backbone.
dropout_shape = get_shaped_neuron_counts(
self.config['resnet_shape'], 0, 0, 1000, self.config['num_groups'] + 1
shape=self.config['resnet_shape'],
in_feat=0,
out_feat=0,
max_neurons=self.config["max_dropout"],
layer_count=self.config['num_groups'] + 1,
)[:-1]

dropout_shape = [
dropout / 1000 * self.config["max_dropout"] for dropout in dropout_shape
]

self.config.update(
{"dropout_%d" % (i + 1): dropout for i, dropout in enumerate(dropout_shape)}
)
Expand Down
10 changes: 5 additions & 5 deletions autoPyTorch/pipeline/components/setup/network_backbone/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def backward(ctx: typing.Any,

def shake_get_alpha_beta(is_training: bool, is_cuda: bool
) -> typing.Tuple[torch.tensor, torch.tensor]:
if is_training:
if not is_training:
result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
return result if not is_cuda else (result[0].cuda(), result[1].cuda())

Expand All @@ -118,11 +118,11 @@ def shake_drop_get_bl(
) -> torch.tensor:
pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)

if not is_training:
# Move to torch.randn(1) for reproducibility
bl = torch.tensor(1.0) if torch.randn(1) <= pl else torch.tensor(0.0)
if is_training:
bl = torch.tensor(pl)
# Move to torch.rand(1) for reproducibility
bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0)
else:
bl = torch.as_tensor(pl)

if is_cuda:
bl = bl.cuda()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,36 @@ def get_hyperparameter_search_space(
) -> ConfigurationSpace:
cs = ConfigurationSpace()

min_num_layers, max_num_layers = num_layers.value_range
num_layers_hp = get_hyperparameter(num_layers, UniformIntegerHyperparameter)
min_num_layers: int = num_layers.value_range[0] # type: ignore
max_num_layers: int = num_layers.value_range[-1] # type: ignore
num_layers_is_constant = (min_num_layers == max_num_layers)

num_layers_hp = get_hyperparameter(num_layers, UniformIntegerHyperparameter)
activation_hp = get_hyperparameter(activation, CategoricalHyperparameter)
cs.add_hyperparameter(num_layers_hp)

cs.add_hyperparameters([num_layers_hp, activation_hp])
cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1))
if not num_layers_is_constant:
cs.add_hyperparameter(activation_hp)
cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1))
elif max_num_layers > 1:
# only add activation if we have more than 1 layer
cs.add_hyperparameter(activation_hp)

for i in range(1, int(max_num_layers)):
num_units_search_space = HyperparameterSearchSpace(hyperparameter=f"units_layer_{i}",
value_range=units_layer.value_range,
default_value=units_layer.default_value,
log=units_layer.log)
for i in range(1, max_num_layers + 1):
num_units_search_space = HyperparameterSearchSpace(
hyperparameter=f"units_layer_{i}",
value_range=units_layer.value_range,
default_value=units_layer.default_value,
log=units_layer.log,
)
num_units_hp = get_hyperparameter(num_units_search_space, UniformIntegerHyperparameter)
cs.add_hyperparameter(num_units_hp)

if i >= int(min_num_layers):
if i >= min_num_layers and not num_layers_is_constant:
# In the case of a constant, the max and min number of layers are the same.
# So no condition is needed. If it is not a constant but a hyperparameter,
# then a condition has to be made so that it accounts for the value of the
# hyperparameter.
cs.add_condition(CS.GreaterThanCondition(num_units_hp, num_layers_hp, i))

return cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def initialization(m: torch.nn.Module) -> None:
torch.nn.Conv2d,
torch.nn.Conv3d,
torch.nn.Linear)):
torch.nn.init.xavier_uniform_(m.weight.data)
torch.nn.init.xavier_normal(m.weight.data)
if m.bias is not None and self.bias_strategy == 'Zero':
torch.nn.init.constant_(m.bias.data, 0.0)
return initialization
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(self, batch_size: int = 64,
self.batch_size = batch_size
self.train_data_loader = None # type: Optional[torch.utils.data.DataLoader]
self.val_data_loader = None # type: Optional[torch.utils.data.DataLoader]
self.test_data_loader: Optional[torch.utils.data.DataLoader] = None

# We also support existing datasets!
self.dataset = None
Expand Down Expand Up @@ -69,7 +70,8 @@ def transform(self, X: np.ndarray) -> np.ndarray:
np.ndarray: Transformed features
"""
X.update({'train_data_loader': self.train_data_loader,
'val_data_loader': self.val_data_loader})
'val_data_loader': self.val_data_loader,
'test_data_loader': self.test_data_loader})
return X

def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
Expand Down Expand Up @@ -112,7 +114,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
shuffle=True,
num_workers=X.get('num_workers', 0),
pin_memory=X.get('pin_memory', True),
drop_last=X.get('drop_last', True),
drop_last=X.get('drop_last', False),
collate_fn=custom_collate_fn,
)

Expand All @@ -126,6 +128,11 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
collate_fn=custom_collate_fn,
)

if X.get('X_test', None) is not None:
self.test_data_loader = self.get_loader(X=X['X_test'],
y=X['y_test'],
batch_size=self.batch_size)

return self

def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size: int = np.inf,
Expand All @@ -137,6 +144,7 @@ def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size:

dataset = BaseDataset(
train_tensors=(X, y),
seed=self.random_state.get_state()[1][0],
# This dataset is used for loading test data in a batched format
train_transforms=self.test_transform,
val_transforms=self.test_transform,
Expand Down
15 changes: 2 additions & 13 deletions autoPyTorch/pipeline/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np

import sklearn.preprocessing
from sklearn.base import ClassifierMixin

import torch
Expand Down Expand Up @@ -101,13 +100,8 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
loader = self.named_steps['data_loader'].get_loader(X=X)
pred = self.named_steps['network'].predict(loader)
if isinstance(self.dataset_properties['output_shape'], int):
proba = pred[:, :self.dataset_properties['output_shape']]
normalizer = proba.sum(axis=1)[:, np.newaxis]
normalizer[normalizer == 0.0] = 1.0
proba /= normalizer

return proba

# The final layer is always softmax now (`pred` already gives pseudo proba)
return pred
else:
raise ValueError("Expected output_shape to be integer, got {},"
"Tabular Classification only supports 'binary' and 'multiclass' outputs"
Expand Down Expand Up @@ -149,11 +143,6 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n
pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
y[batch_from:batch_to] = pred_prob.astype(np.float32)

# Neural networks might not be fit to produce a [0-1] output
# For instance, after small number of epochs.
y = np.clip(y, 0, 1)
y = sklearn.preprocessing.normalize(y, axis=1, norm='l1')

return y

def score(self, X: np.ndarray, y: np.ndarray,
Expand Down
10 changes: 5 additions & 5 deletions test/test_pipeline/components/setup/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,12 @@ def test_dropout(self, resnet_shape):
backbone = resnet_backbone.build_backbone((100, 5))
dropout_probabilites = [resnet_backbone.config[key] for key in resnet_backbone.config if 'dropout_' in key]
dropout_shape = get_shaped_neuron_counts(
resnet_shape, 0, 0, 1000, num_groups + 1
shape=resnet_shape,
in_feat=0,
out_feat=0,
max_neurons=max_dropout,
layer_count=num_groups + 1,
)[:-1]

dropout_shape = [
dropout / 1000 * max_dropout for dropout in dropout_shape
]
blocks_dropout = []
for block in backbone:
if isinstance(block, torch.nn.Sequential):
Expand Down

0 comments on commit 4a29852

Please sign in to comment.