Skip to content

Commit

Permalink
Reduce run time of the test (automl#205)
Browse files Browse the repository at this point in the history
* In progress, changing te4sts

* Reduce time for tests

* Fix flake in tests

* Patch train in other tests also

* Address comments from shuhei and fransisco:

* Move base training to pytest

* Fix flake in tests

* forgot to pass n_samples

* stupid error

* Address comments from shuhei, remove hardcoding and fix bug in dummy eval function

* Skip ensemble test for python >=3.7 and introduce random state for feature processors

* fix flake

* Remove example workflow

* Remove  from __init__ in feature preprocessing
  • Loading branch information
ravinkohli authored May 17, 2021
1 parent ee07c7e commit 4493270
Show file tree
Hide file tree
Showing 18 changed files with 302 additions and 176 deletions.
39 changes: 0 additions & 39 deletions .github/workflows/examples.yml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from math import ceil, floor
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.conditions import EqualsCondition, InCondition
from ConfigSpace.configuration_space import ConfigurationSpace
Expand All @@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> None:
random_state: Optional[np.random.RandomState] = None
):
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

self.add_fit_requirements([
FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from math import ceil, floor
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.conditions import EqualsCondition, InCondition
from ConfigSpace.configuration_space import ConfigurationSpace
Expand All @@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> None:
random_state: Optional[np.random.RandomState] = None
):
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
Expand All @@ -19,13 +19,12 @@
class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, degree: int = 2, interaction_only: bool = False,
include_bias: bool = False,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.degree = degree
self.interaction_only = interaction_only
self.include_bias = include_bias

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
Expand All @@ -17,11 +17,10 @@

class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, standardize: bool = True,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.standardize = standardize

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from math import ceil, floor
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
Expand All @@ -20,12 +20,11 @@
class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 100,
gamma: float = 1.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> None:
random_state: Optional[np.random.RandomState] = None
):
self.n_components = n_components
self.gamma = gamma
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from math import floor
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
Expand All @@ -18,11 +18,10 @@

class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, target_dim: int = 128,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.target_dim = target_dim

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

import numpy as np

from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
autoPyTorchTabularPreprocessingComponent
Expand All @@ -8,7 +12,13 @@
class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
_required_properties: List[str] = ['handles_sparse']

def __init__(self) -> None:
def __init__(self, random_state: Optional[np.random.RandomState] = None):
if random_state is None:
# A trainer components need a random state for
# sampling -- for example in MixUp training
self.random_state = check_random_state(1)
else:
self.random_state = random_state
super().__init__()

def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
36 changes: 22 additions & 14 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
from autoPyTorch.utils.pipeline import get_dataset_requirements


N_SAMPLES = 200


@pytest.fixture(scope="session")
def callattr_ahead_of_alltests(request):
"""
Expand Down Expand Up @@ -191,7 +194,7 @@ def session_run_at_end():
def get_tabular_data(task):
if task == "classification_numerical_only":
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -207,18 +210,18 @@ def get_tabular_data(task):
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "classification_numerical_and_categorical":
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "regression_numerical_only":
X, y = make_regression(n_samples=200,
X, y = make_regression(n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_targets=1,
Expand All @@ -240,8 +243,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())

Expand All @@ -256,8 +259,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
elif task == 'iris':
Expand Down Expand Up @@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
'num_run': np.random.randint(50),
'device': 'cpu',
'budget_type': 'epochs',
'epochs': 100,
'epochs': 5,
'torch_num_threads': 1,
'early_stopping': 10,
'working_dir': '/tmp',
Expand Down Expand Up @@ -326,7 +329,7 @@ def dataset(request):
@pytest.fixture
def dataset_traditional_classifier_num_only():
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -344,15 +347,15 @@ def dataset_traditional_classifier_categorical_only():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


@pytest.fixture
def dataset_traditional_classifier_num_categorical():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
y = y.astype(np.int)
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


Expand Down Expand Up @@ -456,3 +459,8 @@ def loss_mse():
@pytest.fixture
def loss_details(request):
return request.getfixturevalue(request.param)


@pytest.fixture
def n_samples():
return N_SAMPLES
Loading

0 comments on commit 4493270

Please sign in to comment.