From 0e8592bb3520db4af94f4d48aa7156b8dbb4dffa Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Wed, 4 Dec 2024 14:58:11 +0900 Subject: [PATCH 01/28] Update sampler.py --- package/samplers/hebo/sampler.py | 168 ++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 5 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index b76cd5bf..e9d61123 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -1,13 +1,19 @@ from __future__ import annotations from collections.abc import Sequence +from typing import Any +import warnings import numpy as np +import optuna from optuna.distributions import BaseDistribution from optuna.distributions import CategoricalDistribution from optuna.distributions import FloatDistribution from optuna.distributions import IntDistribution +from optuna.samplers import BaseSampler +from optuna.search_space import IntersectionSearchSpace from optuna.study import Study +from optuna.study._study_direction import StudyDirection from optuna.trial import FrozenTrial from optuna.trial import TrialState import optunahub @@ -18,11 +24,70 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): - def __init__(self, search_space: dict[str, BaseDistribution]) -> None: - super().__init__(search_space) - self._hebo = HEBO(self._convert_to_hebo_design_space(search_space)) + """A sampler using `HEBO __` as the backend. - def sample_relative( + For further information about HEBO algorithm, please refer to the following papers: + - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).__` + + Args: + search_space: + A search space required for Define-and-Run manner. Default is :obj:`None`. + + seed: + A seed for ``HEBOSampler``. Default is :obj:`None`. + + constant_liar: + If :obj:`True`, penalize running trials to avoid suggesting parameter configurations + nearby. Default is :obj:`False`. + + .. note:: + Abnormally terminated trials often leave behind a record with a state of + ``RUNNING`` in the storage. + Such "zombie" trial parameters will be avoided by the constant liar algorithm + during subsequent sampling. + When using an :class:`~optuna.storages.RDBStorage`, it is possible to enable the + ``heartbeat_interval`` to change the records for abnormally terminated trials to + ``FAIL``. + (This note is quoted from `TPESampler __`.) + + .. note:: + It is recommended to set this value to :obj:`True` during distributed + optimization to avoid having multiple workers evaluating similar parameter + configurations. In particular, if each objective function evaluation is costly + and the durations of the running states are significant, and/or the number of + workers is high. + (This note is quoted from `TPESampler __`.) + + .. note:: + HEBO algorithm involves multi-objective optimization of multiple acquisition functions. + While `constant_liar` is a simple way to get diverse params for parallel optimization, + it may not be the best approach for HEBO. + + independent_sampler: + A :class:`~optuna.samplers.BaseSampler` instance that is used for independent + sampling. The parameters not contained in the relative search space are sampled + by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler` + is used as the default. + """ # NOQA + def __init__(self, + search_space: dict[str, BaseDistribution] | None = None, + seed: int | None = None, + constant_liar: bool = False, + independent_sampler: BaseSampler | None = None, + ) -> None: + super().__init__(search_space, seed) + if search_space is not None and constant_liar is False: + self._hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) + else: + self._hebo = None + self._intersection_search_space = IntersectionSearchSpace() + self._independent_sampler = ( + independent_sampler or optuna.samplers.RandomSampler(seed=seed) + ) + self._is_independent_sampler_specified = independent_sampler is not None + self._constant_liar = constant_liar + + def _sample_relative_define_and_run( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: params_pd = self._hebo.suggest() @@ -32,6 +97,72 @@ def sample_relative( params[name] = params_pd[name].to_numpy()[0] return params + def _sample_relative_stateless( + self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] + ) -> dict[str, float]: + if self._constant_liar: + target_states = [TrialState.COMPLETE, TrialState.RUNNING] + else: + target_states = [TrialState.COMPLETE] + + trials = study.get_trials(deepcopy=False, states=target_states) + if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: + # note: The backend HEBO implementation use Sobol sampling here. + # This sampler does not call `hebo.suggest()` here because + # Optuna needs to know search space by running the first trial in Define-by-Run. + return {} + + # Assume that the back-end HEBO implementation aims to minimize. + if study.direction == StudyDirection.MINIMIZE: + worst_values = max( + [t.values for t in trials if t.state == TrialState.COMPLETE] + ) + else: + worst_values = min( + [t.values for t in trials if t.state == TrialState.COMPLETE] + ) + sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 + + hebo = HEBO( + self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed + ) + for t in trials: + if t.state == TrialState.COMPLETE: + hebo_params = {name: t.params[name] for name in search_space.keys()} + hebo.observe( + pd.DataFrame([hebo_params]), + np.asarray([x * sign for x in t.values]), + ) + elif t.state == TrialState.RUNNING: + try: + hebo_params = {name: t.params[name] for name in search_space.keys()} + except: # NOQA + # There are one or more params which are not suggested yet. + continue + # If `constant_liar == True`, assume that the RUNNING params result in bad values, + # thus preventing the simultaneous suggestion of (almost) the same params + # during parallel execution. + hebo.observe(pd.DataFrame([hebo_params]), np.asarray([worst_values])) + else: + assert False + params_pd = hebo.suggest() + params = {} + for name in search_space.keys(): + params[name] = params_pd[name].to_numpy()[0] + return params + + def sample_relative( + self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] + ) -> dict[str, float]: + if study._is_multi_objective(): + raise ValueError( + "This function does not support multi-objective optimization study." + ) + if self._hebo is None or self._constant_liar is True: + return self._sample_relative_stateless(study, trial, search_space) + else: + return self._sample_relative_define_and_run(study, trial, search_space) + def after_trial( self, study: Study, @@ -39,7 +170,11 @@ def after_trial( state: TrialState, values: Sequence[float] | None, ) -> None: - self._hebo.observe(pd.DataFrame([trial.params]), np.asarray([values])) + if self._hebo is not None: + # Assume that the back-end HEBO implementation aims to minimize. + if study.direction == StudyDirection.MAXIMIZE: + values = [-x for x in values] + self._hebo.observe(pd.DataFrame([trial.params]), np.asarray([values])) def _convert_to_hebo_design_space( self, search_space: dict[str, BaseDistribution] @@ -103,3 +238,26 @@ def _convert_to_hebo_design_space( else: raise NotImplementedError(f"Unsupported distribution: {distribution}") return DesignSpace().parse(design_space) + + def infer_relative_search_space( + self, study: Study, trial: FrozenTrial + ) -> dict[str, BaseDistribution]: + return optuna.search_space.intersection_search_space( + study._get_trials(deepcopy=False, use_cache=True) + ) + + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + if not self._is_independent_sampler_specified: + warnings.warn( + "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?" + ) + + return self._independent_sampler.sample_independent( + study, trial, param_name, param_distribution + ) From 1e4f749d7b89b9e3c8ce9d903fea68d27a012209 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Wed, 4 Dec 2024 16:18:12 +0900 Subject: [PATCH 02/28] fix lint --- package/samplers/hebo/sampler.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index e9d61123..f7cfe3ee 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -69,7 +69,9 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler` is used as the default. """ # NOQA - def __init__(self, + + def __init__( + self, search_space: dict[str, BaseDistribution] | None = None, seed: int | None = None, constant_liar: bool = False, @@ -81,9 +83,7 @@ def __init__(self, else: self._hebo = None self._intersection_search_space = IntersectionSearchSpace() - self._independent_sampler = ( - independent_sampler or optuna.samplers.RandomSampler(seed=seed) - ) + self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) self._is_independent_sampler_specified = independent_sampler is not None self._constant_liar = constant_liar @@ -114,18 +114,12 @@ def _sample_relative_stateless( # Assume that the back-end HEBO implementation aims to minimize. if study.direction == StudyDirection.MINIMIZE: - worst_values = max( - [t.values for t in trials if t.state == TrialState.COMPLETE] - ) + worst_values = max([t.values for t in trials if t.state == TrialState.COMPLETE]) else: - worst_values = min( - [t.values for t in trials if t.state == TrialState.COMPLETE] - ) + worst_values = min([t.values for t in trials if t.state == TrialState.COMPLETE]) sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 - hebo = HEBO( - self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed - ) + hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed) for t in trials: if t.state == TrialState.COMPLETE: hebo_params = {name: t.params[name] for name in search_space.keys()} @@ -155,9 +149,7 @@ def sample_relative( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: if study._is_multi_objective(): - raise ValueError( - "This function does not support multi-objective optimization study." - ) + raise ValueError("This function does not support multi-objective optimization study.") if self._hebo is None or self._constant_liar is True: return self._sample_relative_stateless(study, trial, search_space) else: From 320872e13a0176be49ea661d6977959e3b2e268c Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Wed, 4 Dec 2024 16:23:19 +0900 Subject: [PATCH 03/28] fix mypy --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index f7cfe3ee..f1f4116d 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -162,7 +162,7 @@ def after_trial( state: TrialState, values: Sequence[float] | None, ) -> None: - if self._hebo is not None: + if self._hebo is not None and values is not None: # Assume that the back-end HEBO implementation aims to minimize. if study.direction == StudyDirection.MAXIMIZE: values = [-x for x in values] From 120f9680e04f0588b57d08f98ba455c3fca28bc5 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Thu, 5 Dec 2024 11:24:40 +0900 Subject: [PATCH 04/28] Update README.md --- package/samplers/hebo/README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index b15659d3..3cdd9578 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -20,6 +20,26 @@ cd HEBO/HEBO pip install -e . ``` +## APIs + +- `HEBOSampler(*, search_space: dict[str, BaseDistribution] | None = None, seed: int | None = None, constant_liar: bool = False, independent_sampler: BaseSampler | None = None)` + - `search_space`: A search space required for Define-and-Run manner. + + Example: + ```python + search_space = { + "x": optuna.distributions.FloatDistribution(-5, 5), + "y": optuna.distributions.FloatDistribution(-5, 5), + } + HEBOSampler(search_space=search_space) + ``` + - `seed`: Seed for random number generator. + - `constant_liar`: If `True`, penalize running trials to avoid suggesting parameter configurations nearby. Default is `False`. + - Note: Abnormally terminated trials often leave behind a record with a state of `RUNNING` in the storage. Such "zombie" trial parameters will be avoided by the constant liar algorithm during subsequent sampling. When using an `optuna.storages.RDBStorage`, it is possible to enable the `heartbeat_interval` to change the records for abnormally terminated trials to `FAIL`. (This note is quoted from [TPESampler](https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L215-L222).) + - Note: It is recommended to set this value to `True` during distributed optimization to avoid having multiple workers evaluating similar parameter configurations. In particular, if each objective function evaluation is costly and the durations of the running states are significant, and/or the number of workers is high. (This note is quoted from [TPESampler](https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L224-L229).) + - Note: HEBO algorithm involves multi-objective optimization of multiple acquisition functions. While `constant_liar` is a simple way to get diverse params for parallel optimization, it may not be the best approach for HEBO. + - `independent_sampler`: A `optuna.samplers.BaseSampler` instance that is used for independent sampling. The parameters not contained in the relative search space are sampled by this sampler. If `None` is specified, `optuna.samplers.RandomSampler` is used as the default. + ## Example ```python From 7855dcef38bd2d2aaba61674727e996d9916a0b0 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Thu, 5 Dec 2024 11:27:48 +0900 Subject: [PATCH 05/28] fix lint --- package/samplers/hebo/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index 3cdd9578..bcbe764e 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -26,6 +26,7 @@ pip install -e . - `search_space`: A search space required for Define-and-Run manner. Example: + ```python search_space = { "x": optuna.distributions.FloatDistribution(-5, 5), @@ -33,11 +34,15 @@ pip install -e . } HEBOSampler(search_space=search_space) ``` + - `seed`: Seed for random number generator. + - `constant_liar`: If `True`, penalize running trials to avoid suggesting parameter configurations nearby. Default is `False`. + - Note: Abnormally terminated trials often leave behind a record with a state of `RUNNING` in the storage. Such "zombie" trial parameters will be avoided by the constant liar algorithm during subsequent sampling. When using an `optuna.storages.RDBStorage`, it is possible to enable the `heartbeat_interval` to change the records for abnormally terminated trials to `FAIL`. (This note is quoted from [TPESampler](https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L215-L222).) - Note: It is recommended to set this value to `True` during distributed optimization to avoid having multiple workers evaluating similar parameter configurations. In particular, if each objective function evaluation is costly and the durations of the running states are significant, and/or the number of workers is high. (This note is quoted from [TPESampler](https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L224-L229).) - Note: HEBO algorithm involves multi-objective optimization of multiple acquisition functions. While `constant_liar` is a simple way to get diverse params for parallel optimization, it may not be the best approach for HEBO. + - `independent_sampler`: A `optuna.samplers.BaseSampler` instance that is used for independent sampling. The parameters not contained in the relative search space are sampled by this sampler. If `None` is specified, `optuna.samplers.RandomSampler` is used as the default. ## Example From e5ff63a01d359ea2801d8e4915e7c67523fe102c Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Thu, 5 Dec 2024 13:34:53 +0900 Subject: [PATCH 06/28] Update README.md --- package/samplers/hebo/README.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index bcbe764e..4b752adf 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -48,13 +48,25 @@ pip install -e . ## Example ```python -search_space = { - "x": FloatDistribution(-10, 10), - "y": IntDistribution(0, 10), +import optuna +import optunahub -} -sampler = HEBOSampler(search_space) + +def objective(trial: optuna.trial.Trial) -> float: + x = trial.suggest_float("x", -10, 10) + y = trial.suggest_int("y", -10, 10) + return x**2 + y**2 + + +module = optunahub.load_module("samplers/hebo") +sampler = module.HEBOSampler(search_space={ + "x": optuna.distributions.FloatDistribution(-10, 10), + "y": optuna.distributions.IntDistribution(-10, 10), +}) study = optuna.create_study(sampler=sampler) +study.optimize(objective, n_trials=100) + +print(study.best_trial.params, study.best_trial.value) ``` See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo/example.py) for a full example. From ae5e5a67170ded233bc24b911e163ef32f224d71 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:23:24 +0900 Subject: [PATCH 07/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index f1f4116d..deec86a6 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -247,7 +247,7 @@ def sample_independent( ) -> Any: if not self._is_independent_sampler_specified: warnings.warn( - "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?" + "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space." ) return self._independent_sampler.sample_independent( From f339b9b6f3821f4a59c09f65196df93f9d2c3d41 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:23:54 +0900 Subject: [PATCH 08/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index deec86a6..8424a956 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -149,7 +149,7 @@ def sample_relative( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: if study._is_multi_objective(): - raise ValueError("This function does not support multi-objective optimization study.") + raise ValueError(f"{self.__class__.__name__} has not supported multi-objective optimization.") if self._hebo is None or self._constant_liar is True: return self._sample_relative_stateless(study, trial, search_space) else: From 0574576df08efe6ac157e641d70eb26100f6efec Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:24:13 +0900 Subject: [PATCH 09/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 8424a956..7f13c1f9 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -78,7 +78,7 @@ def __init__( independent_sampler: BaseSampler | None = None, ) -> None: super().__init__(search_space, seed) - if search_space is not None and constant_liar is False: + if search_space is not None and not constant_liar: self._hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) else: self._hebo = None From cd4f8f947962b2e95858e4c1e5cae4ecb3324282 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:24:32 +0900 Subject: [PATCH 10/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 7f13c1f9..380987f3 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -26,7 +26,7 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): """A sampler using `HEBO __` as the backend. - For further information about HEBO algorithm, please refer to the following papers: + For further information about HEBO algorithm, please refer to the following paper: - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).__` Args: From 02f146bb5cf075f21a2297a95fb9299a837f66a2 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:26:05 +0900 Subject: [PATCH 11/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 380987f3..346e5f56 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -73,6 +73,7 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): def __init__( self, search_space: dict[str, BaseDistribution] | None = None, + *, seed: int | None = None, constant_liar: bool = False, independent_sampler: BaseSampler | None = None, From 8d2cf0bb7046598c096b693cd3a8ff927ea76cd6 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:26:18 +0900 Subject: [PATCH 12/28] Update package/samplers/hebo/README.md Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index 4b752adf..9cf48a97 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -22,7 +22,7 @@ pip install -e . ## APIs -- `HEBOSampler(*, search_space: dict[str, BaseDistribution] | None = None, seed: int | None = None, constant_liar: bool = False, independent_sampler: BaseSampler | None = None)` +- `HEBOSampler(search_space: dict[str, BaseDistribution] | None = None, *, seed: int | None = None, constant_liar: bool = False, independent_sampler: BaseSampler | None = None)` - `search_space`: A search space required for Define-and-Run manner. Example: From 2a2069bd2a1476e71fc91b019a4801331ec0c1b0 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:27:21 +0900 Subject: [PATCH 13/28] Update package/samplers/hebo/README.md Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index 9cf48a97..e66f7a07 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -23,7 +23,7 @@ pip install -e . ## APIs - `HEBOSampler(search_space: dict[str, BaseDistribution] | None = None, *, seed: int | None = None, constant_liar: bool = False, independent_sampler: BaseSampler | None = None)` - - `search_space`: A search space required for Define-and-Run manner. + - `search_space`: By specifying search_space, the sampling speed at each iteration becomes slightly quicker, but this argument is not necessary to run this sampler. Example: From 0aa860ff0e7194d284ee4f984630e817e78d5719 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:27:30 +0900 Subject: [PATCH 14/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 346e5f56..a4090b7f 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -31,7 +31,7 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): Args: search_space: - A search space required for Define-and-Run manner. Default is :obj:`None`. + By specifying search_space, the sampling speed at each iteration becomes slightly quicker, but this argument is not necessary to run this sampler. Default is :obj:`None`. seed: A seed for ``HEBOSampler``. Default is :obj:`None`. From 95800b5f45dae1a850d0139aef0605fd9a269a74 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:28:33 +0900 Subject: [PATCH 15/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index a4090b7f..e0b06a73 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -27,7 +27,7 @@ class HEBOSampler(optunahub.samplers.SimpleBaseSampler): """A sampler using `HEBO __` as the backend. For further information about HEBO algorithm, please refer to the following paper: - - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).__` + - `HEBO Pushing The Limits of Sample-Efficient Hyperparameter Optimisation __` Args: search_space: From 810d701c9f9a16497ae78c0ffccb569a4e7464f8 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:28:51 +0900 Subject: [PATCH 16/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index e0b06a73..7d5732c1 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -108,7 +108,7 @@ def _sample_relative_stateless( trials = study.get_trials(deepcopy=False, states=target_states) if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: - # note: The backend HEBO implementation use Sobol sampling here. + # note: The backend HEBO implementation uses Sobol sampling here. # This sampler does not call `hebo.suggest()` here because # Optuna needs to know search space by running the first trial in Define-by-Run. return {} From 39f2671affeee116bd609d96091d01fe6711d16a Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:29:05 +0900 Subject: [PATCH 17/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 7d5732c1..5e2c48a0 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -115,9 +115,9 @@ def _sample_relative_stateless( # Assume that the back-end HEBO implementation aims to minimize. if study.direction == StudyDirection.MINIMIZE: - worst_values = max([t.values for t in trials if t.state == TrialState.COMPLETE]) + worst_value = max(t.value for t in trials if t.state == TrialState.COMPLETE) else: - worst_values = min([t.values for t in trials if t.state == TrialState.COMPLETE]) + worst_value = min(t.value for t in trials if t.state == TrialState.COMPLETE) sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed) From d9c00b21ac030aa2ca19bca6099924ed22c26730 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:29:52 +0900 Subject: [PATCH 18/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 5e2c48a0..d23eece0 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -87,6 +87,7 @@ def __init__( self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) self._is_independent_sampler_specified = independent_sampler is not None self._constant_liar = constant_liar + self._rng = np.random.default_rng(seed) def _sample_relative_define_and_run( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] From a82754e2a448c9a972d12b90fdbc0910597a0b7f Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:30:07 +0900 Subject: [PATCH 19/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index d23eece0..8e2e6ea6 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -121,7 +121,8 @@ def _sample_relative_stateless( worst_value = min(t.value for t in trials if t.state == TrialState.COMPLETE) sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 - hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed) + seed = self._rng.randint((1 << 31) - 1) + hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) for t in trials: if t.state == TrialState.COMPLETE: hebo_params = {name: t.params[name] for name in search_space.keys()} From 4e1a019c3b032aeb256e7667767d7138341be913 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:43:10 +0900 Subject: [PATCH 20/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 8e2e6ea6..1905be9c 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -107,7 +107,8 @@ def _sample_relative_stateless( else: target_states = [TrialState.COMPLETE] - trials = study.get_trials(deepcopy=False, states=target_states) + use_cache = not self._constant_liar + trials = study._get_trials(deepcopy=False, states=states, use_cache=use_cache) if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: # note: The backend HEBO implementation uses Sobol sampling here. # This sampler does not call `hebo.suggest()` here because From ee03025f8280496b9d62f868d2171b4d4740ac4b Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 17:44:24 +0900 Subject: [PATCH 21/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 1905be9c..057f0d97 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -124,25 +124,10 @@ def _sample_relative_stateless( seed = self._rng.randint((1 << 31) - 1) hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) - for t in trials: - if t.state == TrialState.COMPLETE: - hebo_params = {name: t.params[name] for name in search_space.keys()} - hebo.observe( - pd.DataFrame([hebo_params]), - np.asarray([x * sign for x in t.values]), - ) - elif t.state == TrialState.RUNNING: - try: - hebo_params = {name: t.params[name] for name in search_space.keys()} - except: # NOQA - # There are one or more params which are not suggested yet. - continue - # If `constant_liar == True`, assume that the RUNNING params result in bad values, - # thus preventing the simultaneous suggestion of (almost) the same params - # during parallel execution. - hebo.observe(pd.DataFrame([hebo_params]), np.asarray([worst_values])) - else: - assert False + valid_trials = [t.params for t in trials if all(name in trial.params for name in search_space)] + params = pd.DataFrame([t.params for t in valid_trials]) + values = np.array([sign * t.value if t.state == TrialState.COMPLETE else worst_value for t in valid_trials]) + hebo.observe(params, values) params_pd = hebo.suggest() params = {} for name in search_space.keys(): From 41ce715d442a2903c5f5aed30eff03c649399bc5 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 18:07:05 +0900 Subject: [PATCH 22/28] Update sampler.py --- package/samplers/hebo/sampler.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 057f0d97..bc5b6028 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -85,19 +85,14 @@ def __init__( self._hebo = None self._intersection_search_space = IntersectionSearchSpace() self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) - self._is_independent_sampler_specified = independent_sampler is not None + self._is_fallback_inevitable = False self._constant_liar = constant_liar self._rng = np.random.default_rng(seed) def _sample_relative_define_and_run( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: - params_pd = self._hebo.suggest() - - params = {} - for name in search_space.keys(): - params[name] = params_pd[name].to_numpy()[0] - return params + return {name: row.iloc[0] for name, row in self._hebo.suggest().items() if name in search_space.keys()} def _sample_relative_stateless( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] @@ -108,12 +103,15 @@ def _sample_relative_stateless( target_states = [TrialState.COMPLETE] use_cache = not self._constant_liar - trials = study._get_trials(deepcopy=False, states=states, use_cache=use_cache) + trials = study._get_trials(deepcopy=False, states=target_states, use_cache=use_cache) if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: # note: The backend HEBO implementation uses Sobol sampling here. # This sampler does not call `hebo.suggest()` here because # Optuna needs to know search space by running the first trial in Define-by-Run. + self._is_fallback_inevitable = True return {} + else: + self._is_fallback_inevitable = False # Assume that the back-end HEBO implementation aims to minimize. if study.direction == StudyDirection.MINIMIZE: @@ -128,11 +126,7 @@ def _sample_relative_stateless( params = pd.DataFrame([t.params for t in valid_trials]) values = np.array([sign * t.value if t.state == TrialState.COMPLETE else worst_value for t in valid_trials]) hebo.observe(params, values) - params_pd = hebo.suggest() - params = {} - for name in search_space.keys(): - params[name] = params_pd[name].to_numpy()[0] - return params + return {name: row.iloc[0] for name, row in hebo.suggest().items() if name in search_space.keys()} def sample_relative( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] @@ -234,7 +228,7 @@ def sample_independent( param_name: str, param_distribution: BaseDistribution, ) -> Any: - if not self._is_independent_sampler_specified: + if not self._is_fallback_inevitable: warnings.warn( "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space." ) From 9803705ca63196180fcdf635868126c287203e0e Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 18:10:06 +0900 Subject: [PATCH 23/28] fix ruff --- package/samplers/hebo/sampler.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index bc5b6028..7b0a3d15 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -92,7 +92,11 @@ def __init__( def _sample_relative_define_and_run( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: - return {name: row.iloc[0] for name, row in self._hebo.suggest().items() if name in search_space.keys()} + return { + name: row.iloc[0] + for name, row in self._hebo.suggest().items() + if name in search_space.keys() + } def _sample_relative_stateless( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] @@ -122,17 +126,30 @@ def _sample_relative_stateless( seed = self._rng.randint((1 << 31) - 1) hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) - valid_trials = [t.params for t in trials if all(name in trial.params for name in search_space)] + valid_trials = [ + t.params for t in trials if all(name in trial.params for name in search_space) + ] params = pd.DataFrame([t.params for t in valid_trials]) - values = np.array([sign * t.value if t.state == TrialState.COMPLETE else worst_value for t in valid_trials]) + values = np.array( + [ + sign * t.value if t.state == TrialState.COMPLETE else worst_value + for t in valid_trials + ] + ) hebo.observe(params, values) - return {name: row.iloc[0] for name, row in hebo.suggest().items() if name in search_space.keys()} + return { + name: row.iloc[0] + for name, row in hebo.suggest().items() + if name in search_space.keys() + } def sample_relative( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] ) -> dict[str, float]: if study._is_multi_objective(): - raise ValueError(f"{self.__class__.__name__} has not supported multi-objective optimization.") + raise ValueError( + f"{self.__class__.__name__} has not supported multi-objective optimization." + ) if self._hebo is None or self._constant_liar is True: return self._sample_relative_stateless(study, trial, search_space) else: From a46a02e9a99ad9ba8afcfc573b67ad9f9ee7eeb4 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Sun, 8 Dec 2024 18:25:35 +0900 Subject: [PATCH 24/28] Update README.md --- package/samplers/hebo/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index e66f7a07..c60ea858 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -63,6 +63,7 @@ sampler = module.HEBOSampler(search_space={ "x": optuna.distributions.FloatDistribution(-10, 10), "y": optuna.distributions.IntDistribution(-10, 10), }) +# sampler = module.HEBOSampler() # Note: `search_space` is not required, and thus it works too. study = optuna.create_study(sampler=sampler) study.optimize(objective, n_trials=100) From 1de364d12817a7ba570c0edf4ae420b4761e6ad1 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Tue, 10 Dec 2024 12:18:41 +0900 Subject: [PATCH 25/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 7b0a3d15..641fd39f 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -126,10 +126,9 @@ def _sample_relative_stateless( seed = self._rng.randint((1 << 31) - 1) hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) - valid_trials = [ + params = pd.DataFrame([ t.params for t in trials if all(name in trial.params for name in search_space) - ] - params = pd.DataFrame([t.params for t in valid_trials]) + ]) values = np.array( [ sign * t.value if t.state == TrialState.COMPLETE else worst_value From 2d52a31c96c3d3aa6583dc6b282eff15fc8bb851 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Tue, 10 Dec 2024 12:20:57 +0900 Subject: [PATCH 26/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 641fd39f..4600a514 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -108,20 +108,19 @@ def _sample_relative_stateless( use_cache = not self._constant_liar trials = study._get_trials(deepcopy=False, states=target_states, use_cache=use_cache) - if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: + is_complete = np.array([t.state == TrialState.COMPLETE for t in trials]) + if not np.any(is_complete): # note: The backend HEBO implementation uses Sobol sampling here. # This sampler does not call `hebo.suggest()` here because # Optuna needs to know search space by running the first trial in Define-by-Run. - self._is_fallback_inevitable = True + self._is_independent_sample_necessary = True return {} else: - self._is_fallback_inevitable = False + self._is_independent_sample_necessary = False # Assume that the back-end HEBO implementation aims to minimize. - if study.direction == StudyDirection.MINIMIZE: - worst_value = max(t.value for t in trials if t.state == TrialState.COMPLETE) - else: - worst_value = min(t.value for t in trials if t.state == TrialState.COMPLETE) + values = np.array([t.value if t.state == TrialState.COMPLETE else np.nan for t in trials]) + worst_value = np.nanmax(values) if study.direction == StudyDirection.MINIMIZE else np.nanmin(values) sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 seed = self._rng.randint((1 << 31) - 1) From 34ec5f3261417b51c5ceb17b3e2fbaaa2acc79a5 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Tue, 10 Dec 2024 12:21:34 +0900 Subject: [PATCH 27/28] Update package/samplers/hebo/sampler.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- package/samplers/hebo/sampler.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 4600a514..817a2ccd 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -128,12 +128,8 @@ def _sample_relative_stateless( params = pd.DataFrame([ t.params for t in trials if all(name in trial.params for name in search_space) ]) - values = np.array( - [ - sign * t.value if t.state == TrialState.COMPLETE else worst_value - for t in valid_trials - ] - ) + values[np.isnan(values)] = worst_value + values *= sign hebo.observe(params, values) return { name: row.iloc[0] From bb3b9f8b61f58202c4faca46bc6969d8bdd909c4 Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Tue, 10 Dec 2024 13:16:33 +0900 Subject: [PATCH 28/28] fix lint --- package/samplers/hebo/sampler.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index 817a2ccd..f77a2d2a 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -85,7 +85,7 @@ def __init__( self._hebo = None self._intersection_search_space = IntersectionSearchSpace() self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) - self._is_fallback_inevitable = False + self._is_independent_sample_necessary = False self._constant_liar = constant_liar self._rng = np.random.default_rng(seed) @@ -117,17 +117,18 @@ def _sample_relative_stateless( return {} else: self._is_independent_sample_necessary = False + trials = [t for t in trials if set(search_space.keys()) <= set(t.params.keys())] # Assume that the back-end HEBO implementation aims to minimize. values = np.array([t.value if t.state == TrialState.COMPLETE else np.nan for t in trials]) - worst_value = np.nanmax(values) if study.direction == StudyDirection.MINIMIZE else np.nanmin(values) + worst_value = ( + np.nanmax(values) if study.direction == StudyDirection.MINIMIZE else np.nanmin(values) + ) sign = 1 if study.direction == StudyDirection.MINIMIZE else -1 - seed = self._rng.randint((1 << 31) - 1) + seed = int(self._rng.integers(low=1, high=(1 << 31))) hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=seed) - params = pd.DataFrame([ - t.params for t in trials if all(name in trial.params for name in search_space) - ]) + params = pd.DataFrame([t.params for t in trials]) values[np.isnan(values)] = worst_value values *= sign hebo.observe(params, values) @@ -239,7 +240,7 @@ def sample_independent( param_name: str, param_distribution: BaseDistribution, ) -> Any: - if not self._is_fallback_inevitable: + if not self._is_independent_sample_necessary: warnings.warn( "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space." )