diff --git a/package/samplers/hebo_base_sampler/LICENSE b/package/samplers/hebo_base_sampler/LICENSE new file mode 100644 index 00000000..f763c760 --- /dev/null +++ b/package/samplers/hebo_base_sampler/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Hiroki Takizawa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/package/samplers/hebo_base_sampler/README.md b/package/samplers/hebo_base_sampler/README.md new file mode 100644 index 00000000..bd869228 --- /dev/null +++ b/package/samplers/hebo_base_sampler/README.md @@ -0,0 +1,47 @@ +--- +author: Hiroki Takizawa +title: HEBO (Heteroscedastic and Evolutionary Bayesian Optimisation) supporting Define-by-Run and parallelization +description: This package offers HEBO algorithm using BaseSampler and supports parallelization in exchange for increased computation. +tags: [sampler, Bayesian optimization, Heteroscedastic Gaussian process, Evolutionary algorithm] +optuna_versions: [4.1.0] +license: MIT License +--- + +## Class or Function Names + +- HEBOSampler + +## Installation + +```bash +pip install -r https://hub.optuna.org/samplers/hebo_base_sampler/requirements.txt +git clone git@github.com:huawei-noah/HEBO.git +cd HEBO/HEBO +pip install -e . +``` + +## Example + +```python +def objective(trial): + x = trial.suggest_float("x", -1, 1) + y = trial.suggest_int("y", -1, 1) + sleep(1.0) + return x ** 2 + y +sampler = HEBOSampler(constant_liar=True) +study = optuna.create_study(sampler=sampler) +study.optimize(objective, n_trials=20, n_jobs=2) +``` + +See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo_base_sampler/example.py) for a full example. + +## Others + +This package is based on [the preceding HEBO package](https://hub.optuna.org/samplers/hebo/) authored by HideakiImamura. + +HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard). +Please refer to [the official repository of HEBO](https://github.com/huawei-noah/HEBO/tree/master/HEBO) for more details. + +### Reference + +Cowen-Rivers, Alexander I., et al. "An Empirical Study of Assumptions in Bayesian Optimisation." arXiv preprint arXiv:2012.03826 (2021). diff --git a/package/samplers/hebo_base_sampler/__init__.py b/package/samplers/hebo_base_sampler/__init__.py new file mode 100644 index 00000000..b3a2fabf --- /dev/null +++ b/package/samplers/hebo_base_sampler/__init__.py @@ -0,0 +1,4 @@ +from .sampler import HEBOSampler + + +__all__ = ["HEBOSampler"] diff --git a/package/samplers/hebo_base_sampler/example.py b/package/samplers/hebo_base_sampler/example.py new file mode 100644 index 00000000..67e2f8ed --- /dev/null +++ b/package/samplers/hebo_base_sampler/example.py @@ -0,0 +1,25 @@ +import time + +import optuna +import optunahub + + +module = optunahub.load_module("samplers/hebo_base_sampler") +HEBOSampler = module.HEBOSampler + + +def objective(trial: optuna.trial.Trial) -> float: + x = trial.suggest_float("x", -10, 10) + y = trial.suggest_int("y", -10, 10) + time.sleep(1.0) + return x**2 + y**2 + + +if __name__ == "__main__": + sampler = HEBOSampler(constant_liar=True) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=100, n_jobs=2) + print(study.best_trial.params) + + fig = optuna.visualization.plot_optimization_history(study) + fig.write_image("hebo_optimization_history.png") diff --git a/package/samplers/hebo_base_sampler/requirements.txt b/package/samplers/hebo_base_sampler/requirements.txt new file mode 100644 index 00000000..2ba8ec1a --- /dev/null +++ b/package/samplers/hebo_base_sampler/requirements.txt @@ -0,0 +1,3 @@ +optuna +optunahub +hebo@git+https://github.com/huawei-noah/HEBO.git@v0.3.6#subdirectory=HEBO diff --git a/package/samplers/hebo_base_sampler/sampler.py b/package/samplers/hebo_base_sampler/sampler.py new file mode 100644 index 00000000..f3ba04f1 --- /dev/null +++ b/package/samplers/hebo_base_sampler/sampler.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +from typing import Any +import warnings + +from hebo.design_space.design_space import DesignSpace +from hebo.optimizers.hebo import HEBO +import numpy as np +import optuna +from optuna.distributions import BaseDistribution +from optuna.distributions import CategoricalDistribution +from optuna.distributions import FloatDistribution +from optuna.distributions import IntDistribution +from optuna.samplers import BaseSampler +from optuna.search_space import IntersectionSearchSpace +from optuna.study import Study +from optuna.study._study_direction import StudyDirection +from optuna.trial import FrozenTrial +from optuna.trial import TrialState +import pandas as pd + + +class HEBOSampler(BaseSampler): # type: ignore + """A sampler using `HEBO __` as the backend. + + For further information about HEBO algorithm, please refer to the following papers: + - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).__` + + Args: + seed: + A seed for ``HEBOSampler``. Default is :obj:`None`. + + constant_liar: + If :obj:`True`, penalize running trials to avoid suggesting parameter configurations + nearby. + + .. note:: + Abnormally terminated trials often leave behind a record with a state of + ``RUNNING`` in the storage. + Such "zombie" trial parameters will be avoided by the constant liar algorithm + during subsequent sampling. + When using an :class:`~optuna.storages.RDBStorage`, it is possible to enable the + ``heartbeat_interval`` to change the records for abnormally terminated trials to + ``FAIL``. + (This note is quoted from `TPESampler __`.) + + .. note:: + It is recommended to set this value to :obj:`True` during distributed + optimization to avoid having multiple workers evaluating similar parameter + configurations. In particular, if each objective function evaluation is costly + and the durations of the running states are significant, and/or the number of + workers is high. + (This note is quoted from `TPESampler __`.) + + .. note:: + HEBO algorithm involves multi-objective optimization of multiple acquisition functions. + While `constant_liar` is a simple way to get diverse params for parallel optimization, + it may not be the best approach for HEBO. + + independent_sampler: + A :class:`~optuna.samplers.BaseSampler` instance that is used for independent + sampling. The parameters not contained in the relative search space are sampled + by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler` + is used as the default. + + """ # NOQA + + def __init__( + self, + seed: int | None = None, + constant_liar: bool = False, + independent_sampler: BaseSampler | None = None, + ) -> None: + self._seed = seed + self._intersection_search_space = IntersectionSearchSpace() + self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) + self._is_independent_sampler_specified = independent_sampler is not None + self._constant_liar = constant_liar + + def sample_relative( + self, + study: Study, + trial: FrozenTrial, + search_space: dict[str, BaseDistribution], + ) -> dict[str, float]: + if study._is_multi_objective(): + raise ValueError("This function does not support multi-objective optimization study.") + if self._constant_liar: + target_states = [TrialState.COMPLETE, TrialState.RUNNING] + else: + target_states = [TrialState.COMPLETE] + + trials = study.get_trials(deepcopy=False, states=target_states) + if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: + # note: The backend HEBO implementation use Sobol sampling here. + # This sampler does not call `hebo.suggest()` here because + # Optuna needs to know search space by running the first trial. + return {} + + # Assume that the back-end HEBO implementation aims to minimize. + if study.direction == StudyDirection.MINIMIZE: + worst_values = max(t.values for t in trials if t.state == TrialState.COMPLETE) + else: + worst_values = min(t.values for t in trials if t.state == TrialState.COMPLETE) + sign = 1.0 if study.direction == StudyDirection.MINIMIZE else -1.0 + + hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed) + for t in trials: + if t.state == TrialState.COMPLETE: + hebo_params = {name: t.params[name] for name in search_space.keys()} + hebo.observe( + pd.DataFrame([hebo_params]), + np.asarray([x * sign for x in t.values]), + ) + elif t.state == TrialState.RUNNING: + try: + hebo_params = {name: t.params[name] for name in search_space.keys()} + except: # NOQA + # There are params which is not suggested yet. + continue + # If `constant_liar == True`, assume that the RUNNING params result in bad values, + # thus preventing the simultaneous suggestion of (almost) the same params + # during parallel execution. + hebo.observe(pd.DataFrame([hebo_params]), np.asarray([worst_values])) + else: + assert False + params_pd = hebo.suggest() + params = {} + for name in search_space.keys(): + params[name] = params_pd[name].to_numpy()[0] + return params + + def _convert_to_hebo_design_space( + self, search_space: dict[str, BaseDistribution] + ) -> DesignSpace: + design_space = [] + for name, distribution in search_space.items(): + if isinstance(distribution, FloatDistribution) and not distribution.log: + design_space.append( + { + "name": name, + "type": "num", + "lb": distribution.low, + "ub": distribution.high, + } + ) + elif isinstance(distribution, FloatDistribution) and distribution.log: + design_space.append( + { + "name": name, + "type": "pow", + "lb": distribution.low, + "ub": distribution.high, + } + ) + elif isinstance(distribution, IntDistribution) and distribution.log: + design_space.append( + { + "name": name, + "type": "pow_int", + "lb": distribution.low, + "ub": distribution.high, + } + ) + elif isinstance(distribution, IntDistribution) and distribution.step: + design_space.append( + { + "name": name, + "type": "step_int", + "lb": distribution.low, + "ub": distribution.high, + "step": distribution.step, + } + ) + elif isinstance(distribution, IntDistribution): + design_space.append( + { + "name": name, + "type": "int", + "lb": distribution.low, + "ub": distribution.high, + } + ) + elif isinstance(distribution, CategoricalDistribution): + design_space.append( + { + "name": name, + "type": "cat", + "categories": distribution.choices, + } + ) + else: + raise NotImplementedError(f"Unsupported distribution: {distribution}") + return DesignSpace().parse(design_space) + + def infer_relative_search_space( + self, study: Study, trial: FrozenTrial + ) -> dict[str, BaseDistribution]: + return optuna.search_space.intersection_search_space( + study._get_trials(deepcopy=False, use_cache=True) + ) + + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + if not self._is_independent_sampler_specified: + warnings.warn( + "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?" + ) + + return self._independent_sampler.sample_independent( + study, trial, param_name, param_distribution + )