Skip to content

Commit

Permalink
[feature] Greedy Portfolio (automl#200)
Browse files Browse the repository at this point in the history
* initial configurations added

* In progress, adding flag in search function

* Adds documentation, example and fixes setup.py

* Address comments from shuhei, change run_greedy to portfolio_selection

* address comments from fransisco, movie portfolio to configs

* Address comments from fransisco, add tests for greedy portfolio and tests

* fix flake tests

* Simplify portfolio selection

* Update autoPyTorch/optimizer/smbo.py

Co-authored-by: Francisco Rivera Valverde <[email protected]>

* Address comments from fransisco, path exception handling and test

* fix flake

* Address comments from shuhei

* fix bug in setup.py

* fix tests in base trainer evaluate, increase n samples and add seed

* fix tests in base trainer evaluate, increase n samples (fix)

Co-authored-by: Francisco Rivera Valverde <[email protected]>
  • Loading branch information
ravinkohli and franchuterivera authored May 31, 2021
1 parent 097cb99 commit 1e08fc9
Show file tree
Hide file tree
Showing 16 changed files with 853 additions and 35 deletions.
17 changes: 15 additions & 2 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class BaseTask:
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -697,6 +700,7 @@ def _search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: Optional[str] = None
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -767,7 +771,15 @@ def _search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
portfolio_selection (str), (default=None):
This argument controls the initial configurations that
AutoPyTorch uses to warm start SMAC for hyperparameter
optimization. By default, no warm-starting happens.
The user can provide a path to a json file containing
configurations, similar to (...herepathtogreedy...).
Additionally, the keyword 'greedy' is supported,
which would use the default portfolio from
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -955,7 +967,8 @@ def _search(
# We do not increase the num_run here, this is something
# smac does internally
start_num_run=self._backend.get_next_num_run(peek=True),
search_space_updates=self.search_space_updates
search_space_updates=self.search_space_updates,
portfolio_selection=portfolio_selection,
)
try:
run_history, self.trajectory, budget_type = \
Expand Down
47 changes: 30 additions & 17 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ class TabularClassificationTask(BaseTask):
If None, all possible components are used. Otherwise
specifies set of components not to use. Incompatible
with include components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""
def __init__(
self,
Expand Down Expand Up @@ -119,6 +122,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: Optional[str] = None,
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand All @@ -131,21 +135,21 @@ def search(
A pair of features (X_train) and targets (y_train) used to fit a
pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
be provided to track the generalization performance of each stage.
optimize_metric (str): name of the metric that is used to
evaluate a pipeline.
optimize_metric (str):
name of the metric that is used to evaluate a pipeline.
budget_type (Optional[str]):
Type of budget to be used when fitting the pipeline.
Either 'epochs' or 'runtime'. If not provided, uses
the default in the pipeline config ('epochs')
budget (Optional[float]):
Budget to fit a single run of the pipeline. If not
provided, uses the default in the pipeline config
total_walltime_limit (int), (default=100): Time limit
in seconds for the search of appropriate models.
total_walltime_limit (int), (default=100):
Time limit in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit_secs (int), (default=None): Time limit
for a single call to the machine learning model.
func_eval_time_limit_secs (int), (default=None):
Time limit for a single call to the machine learning model.
Model fitting will be terminated if the machine
learning algorithm runs over the time limit. Set
this value high enough so that typical machine
Expand All @@ -162,32 +166,40 @@ def search(
feature by turning this flag to False. All machine learning
algorithms that are fitted during search() are considered for
ensemble building.
memory_limit (Optional[int]), (default=4096): Memory
limit in MB for the machine learning algorithm. autopytorch
memory_limit (Optional[int]), (default=4096):
Memory limit in MB for the machine learning algorithm. autopytorch
will stop fitting the machine learning algorithm if it tries
to allocate more than memory_limit MB. If None is provided,
no memory limit is set. In case of multi-processing, memory_limit
will be per job. This memory limit also applies to the ensemble
creation process.
smac_scenario_args (Optional[Dict]): Additional arguments inserted
into the scenario of SMAC. See the
smac_scenario_args (Optional[Dict]):
Additional arguments inserted into the scenario of SMAC. See the
[SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
get_smac_object_callback (Optional[Callable]): Callback function
to create an object of class
get_smac_object_callback (Optional[Callable]):
Callback function to create an object of class
[smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
The function must accept the arguments scenario_dict,
instances, num_params, runhistory, seed and ta. This is
an advanced feature. Use only if you are familiar with
[SMAC](https://automl.github.io/SMAC3/master/index.html).
all_supported_metrics (bool), (default=True): if True, all
metrics supporting current task will be calculated
all_supported_metrics (bool), (default=True):
if True, all metrics supporting current task will be calculated
for each pipeline and results will be available via cv_results
precision (int), (default=32): Numeric precision used when loading
ensemble data. Can be either '16', '32' or '64'.
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
load_models (bool), (default=True):
Whether to load the models after fitting AutoPyTorch.
portfolio_selection (str), (default=None):
This argument controls the initial configurations that
AutoPyTorch uses to warm start SMAC for hyperparameter
optimization. By default, no warm-starting happens.
The user can provide a path to a json file containing
configurations, similar to (...herepathtogreedy...).
Additionally, the keyword 'greedy' is supported,
which would use the default portfolio from
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -233,6 +245,7 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
portfolio_selection=portfolio_selection,
)

def predict(
Expand Down
14 changes: 14 additions & 0 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class TabularRegressionTask(BaseTask):
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -111,6 +114,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: Optional[str] = None,
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -175,6 +179,15 @@ def search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
portfolio_selection (str), (default=None):
This argument controls the initial configurations that
AutoPyTorch uses to warm start SMAC for hyperparameter
optimization. By default, no warm-starting happens.
The user can provide a path to a json file containing
configurations, similar to (...herepathtogreedy...).
Additionally, the keyword 'greedy' is supported,
which would use the default portfolio from
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -221,6 +234,7 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
portfolio_selection=portfolio_selection,
)

def predict(
Expand Down
Loading

0 comments on commit 1e08fc9

Please sign in to comment.