Skip to content

Commit

Permalink
[Bug] Fix random halt problems on traditional pipelines (automl#147)
Browse files Browse the repository at this point in the history
* [feat] Fix random halt problems on traditional pipelines

* Documentation update

* Fix flake

* Flake due to kernel pca errors
  • Loading branch information
franchuterivera authored Mar 29, 2021
1 parent 7bcde56 commit 68fc77f
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 2 deletions.
51 changes: 49 additions & 2 deletions autoPyTorch/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@


class MyTraditionalTabularClassificationPipeline(BaseEstimator):
"""
A wrapper class that holds a pipeline for traditional classification.
Estimators like CatBoost, and Random Forest are considered traditional machine
learning models and are fitted before neural architecture search.
This class is an interface to fit a pipeline containing a traditional machine
learning model, and is the final object that is stored for inference.
Attributes:
dataset_properties (Dict[str, Any]):
A dictionary containing dataset specific information
random_state (Optional[Union[int, np.random.RandomState]]):
Object that contains a seed and allows for reproducible results
init_params (Optional[Dict]):
An optional dictionary that is passed to the pipeline's steps. It complies
a similar function as the kwargs
"""
def __init__(self, config: str,
dataset_properties: Dict[str, Any],
random_state: Optional[Union[int, np.random.RandomState]] = None,
Expand Down Expand Up @@ -98,6 +115,21 @@ def get_default_pipeline_options() -> Dict[str, Any]:


class DummyClassificationPipeline(DummyClassifier):
"""
A wrapper class that holds a pipeline for dummy classification.
A wrapper over DummyClassifier of scikit learn. This estimator is considered the
worst performing model. In case of failure, at least this model will be fitted.
Attributes:
dataset_properties (Dict[str, Any]):
A dictionary containing dataset specific information
random_state (Optional[Union[int, np.random.RandomState]]):
Object that contains a seed and allows for reproducible results
init_params (Optional[Dict]):
An optional dictionary that is passed to the pipeline's steps. It complies
a similar function as the kwargs
"""
def __init__(self, config: Configuration,
random_state: Optional[Union[int, np.random.RandomState]] = None,
init_params: Optional[Dict] = None
Expand Down Expand Up @@ -148,6 +180,21 @@ def get_default_pipeline_options() -> Dict[str, Any]:


class DummyRegressionPipeline(DummyRegressor):
"""
A wrapper class that holds a pipeline for dummy regression.
A wrapper over DummyRegressor of scikit learn. This estimator is considered the
worst performing model. In case of failure, at least this model will be fitted.
Attributes:
dataset_properties (Dict[str, Any]):
A dictionary containing dataset specific information
random_state (Optional[Union[int, np.random.RandomState]]):
Object that contains a seed and allows for reproducible results
init_params (Optional[Dict]):
An optional dictionary that is passed to the pipeline's steps. It complies
a similar function as the kwargs
"""
def __init__(self, config: Configuration,
random_state: Optional[Union[int, np.random.RandomState]] = None,
init_params: Optional[Dict] = None) -> None:
Expand Down Expand Up @@ -351,7 +398,7 @@ def _get_pipeline(self) -> BaseEstimator:
if isinstance(self.configuration, int):
pipeline = self.pipeline_class(config=self.configuration,
random_state=np.random.RandomState(self.seed),
init_params=self.fit_dictionary)
init_params=self._init_params)
elif isinstance(self.configuration, Configuration):
pipeline = self.pipeline_class(config=self.configuration,
dataset_properties=self.dataset_properties,
Expand All @@ -364,7 +411,7 @@ def _get_pipeline(self) -> BaseEstimator:
pipeline = self.pipeline_class(config=self.configuration,
dataset_properties=self.dataset_properties,
random_state=np.random.RandomState(self.seed),
init_params=self.fit_dictionary)
init_params=self._init_params)
else:
raise ValueError("Invalid configuration entered")
return pipeline
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import flaky

import numpy as np

import pytest
Expand Down Expand Up @@ -51,6 +53,7 @@ def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor):
transformed = column_transformer.transform(X['X_train'])
assert isinstance(transformed, np.ndarray)

@flaky.flaky(max_runs=3)
def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor):
"""
This test ensures that a tabular classification
Expand Down

0 comments on commit 68fc77f

Please sign in to comment.