optuna · eukaryo · Dec 3, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/package/samplers/hebo_base_sampler/LICENSE b/package/samplers/hebo_base_sampler/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Hiroki Takizawa
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/samplers/hebo_base_sampler/README.md b/package/samplers/hebo_base_sampler/README.md
@@ -0,0 +1,47 @@
+---
+author: Hiroki Takizawa
+title: HEBO (Heteroscedastic and Evolutionary Bayesian Optimisation) supporting Define-by-Run and parallelization
+description: This package offers HEBO algorithm using BaseSampler and supports parallelization in exchange for increased computation.
+tags: [sampler, Bayesian optimization, Heteroscedastic Gaussian process, Evolutionary algorithm]
+optuna_versions: [4.1.0]
+license: MIT License
+---
+
+## Class or Function Names
+
+- HEBOSampler
+
+## Installation
+
+```bash
+pip install -r https://hub.optuna.org/samplers/hebo_base_sampler/requirements.txt
+git clone [email protected]:huawei-noah/HEBO.git
+cd HEBO/HEBO
+pip install -e .
+```
+
+## Example
+
+```python
+def objective(trial):
+    x = trial.suggest_float("x", -1, 1)
+    y = trial.suggest_int("y", -1, 1)
+    sleep(1.0)
+    return x ** 2 + y
+sampler = HEBOSampler(constant_liar=True)
+study = optuna.create_study(sampler=sampler)
+study.optimize(objective, n_trials=20, n_jobs=2)
+```
+
+See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo_base_sampler/example.py) for a full example.
+
+## Others
+
+This package is based on [the preceding HEBO package](https://hub.optuna.org/samplers/hebo/) authored by HideakiImamura.
+
+HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).
+Please refer to [the official repository of HEBO](https://github.com/huawei-noah/HEBO/tree/master/HEBO) for more details.
+
+### Reference
+
+Cowen-Rivers, Alexander I., et al. "An Empirical Study of Assumptions in Bayesian Optimisation." arXiv preprint arXiv:2012.03826 (2021).
diff --git a/package/samplers/hebo_base_sampler/__init__.py b/package/samplers/hebo_base_sampler/__init__.py
@@ -0,0 +1,4 @@
+from .sampler import HEBOSampler
+
+
+__all__ = ["HEBOSampler"]
diff --git a/package/samplers/hebo_base_sampler/example.py b/package/samplers/hebo_base_sampler/example.py
@@ -0,0 +1,25 @@
+import time
+
+import optuna
+import optunahub
+
+
+module = optunahub.load_module("samplers/hebo_base_sampler")
+HEBOSampler = module.HEBOSampler
+
+
+def objective(trial: optuna.trial.Trial) -> float:
+    x = trial.suggest_float("x", -10, 10)
+    y = trial.suggest_int("y", -10, 10)
+    time.sleep(1.0)
+    return x**2 + y**2
+
+
+if __name__ == "__main__":
+    sampler = HEBOSampler(constant_liar=True)
+    study = optuna.create_study(sampler=sampler)
+    study.optimize(objective, n_trials=100, n_jobs=2)
+    print(study.best_trial.params)
+
+    fig = optuna.visualization.plot_optimization_history(study)
+    fig.write_image("hebo_optimization_history.png")
diff --git a/package/samplers/hebo_base_sampler/requirements.txt b/package/samplers/hebo_base_sampler/requirements.txt
@@ -0,0 +1,3 @@
+optuna
+optunahub
+hebo@git+https://github.com/huawei-noah/[email protected]#subdirectory=HEBO
-hebo@git+https://github.com/huawei-noah/HEBO.git@v0.3.6#subdirectory=HEBO
+hebo==0.3.6
-hebo@git+https://github.com/huawei-noah/HEBO.git@v0.3.6#subdirectory=HEBO
+hebo==0.3.6
diff --git a/package/samplers/hebo_base_sampler/sampler.py b/package/samplers/hebo_base_sampler/sampler.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+from typing import Any
+import warnings
+
+from hebo.design_space.design_space import DesignSpace
+from hebo.optimizers.hebo import HEBO
+import numpy as np
+import optuna
+from optuna.distributions import BaseDistribution
+from optuna.distributions import CategoricalDistribution
+from optuna.distributions import FloatDistribution
+from optuna.distributions import IntDistribution
+from optuna.samplers import BaseSampler
+from optuna.search_space import IntersectionSearchSpace
+from optuna.study import Study
+from optuna.study._study_direction import StudyDirection
+from optuna.trial import FrozenTrial
+from optuna.trial import TrialState
+import pandas as pd
+
+
+class HEBOSampler(BaseSampler):  # type: ignore
+    """A sampler using `HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>__` as the backend.
+
+    For further information about HEBO algorithm, please refer to the following papers:
+    - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).<https://arxiv.org/abs/2012.03826>__`
+
+    Args:
+        seed:
+            A seed for ``HEBOSampler``. Default is :obj:`None`.
+
+        constant_liar:
+            If :obj:`True`, penalize running trials to avoid suggesting parameter configurations
+            nearby.
+
+            .. note::
+                Abnormally terminated trials often leave behind a record with a state of
+                ``RUNNING`` in the storage.
+                Such "zombie" trial parameters will be avoided by the constant liar algorithm
+                during subsequent sampling.
+                When using an :class:`~optuna.storages.RDBStorage`, it is possible to enable the
+                ``heartbeat_interval`` to change the records for abnormally terminated trials to
+                ``FAIL``.
+                (This note is quoted from `TPESampler <https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L215-L222>__`.)
+
+            .. note::
+                It is recommended to set this value to :obj:`True` during distributed
+                optimization to avoid having multiple workers evaluating similar parameter
+                configurations. In particular, if each objective function evaluation is costly
+                and the durations of the running states are significant, and/or the number of
+                workers is high.
+                (This note is quoted from `TPESampler <https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L224-L229>__`.)
+
+            .. note::
+                HEBO algorithm involves multi-objective optimization of multiple acquisition functions.
+                While `constant_liar` is a simple way to get diverse params for parallel optimization,
+                it may not be the best approach for HEBO.
+
+        independent_sampler:
+            A :class:`~optuna.samplers.BaseSampler` instance that is used for independent
+            sampling. The parameters not contained in the relative search space are sampled
+            by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler`
+            is used as the default.
+
+    """  # NOQA
+
+    def __init__(
+        self,
+        seed: int | None = None,
+        constant_liar: bool = False,
+        independent_sampler: BaseSampler | None = None,
+    ) -> None:
+        self._seed = seed
+        self._intersection_search_space = IntersectionSearchSpace()
+        self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
+        self._is_independent_sampler_specified = independent_sampler is not None
+        self._constant_liar = constant_liar
+
+    def sample_relative(
+        self,
+        study: Study,
+        trial: FrozenTrial,
+        search_space: dict[str, BaseDistribution],
+    ) -> dict[str, float]:
+        if study._is_multi_objective():
+            raise ValueError("This function does not support multi-objective optimization study.")
+        if self._constant_liar:
+            target_states = [TrialState.COMPLETE, TrialState.RUNNING]
+        else:
+            target_states = [TrialState.COMPLETE]
+
+        trials = study.get_trials(deepcopy=False, states=target_states)
+        if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1:
+            # note: The backend HEBO implementation use Sobol sampling here.
+            # This sampler does not call `hebo.suggest()` here because
+            # Optuna needs to know search space by running the first trial.
+            return {}
+
+        # Assume that the back-end HEBO implementation aims to minimize.
+        if study.direction == StudyDirection.MINIMIZE:
+            worst_values = max(t.values for t in trials if t.state == TrialState.COMPLETE)
+        else:
+            worst_values = min(t.values for t in trials if t.state == TrialState.COMPLETE)
+        sign = 1.0 if study.direction == StudyDirection.MINIMIZE else -1.0
+
+        hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed)
+        for t in trials:
+            if t.state == TrialState.COMPLETE:
+                hebo_params = {name: t.params[name] for name in search_space.keys()}
+                hebo.observe(
+                    pd.DataFrame([hebo_params]),
+                    np.asarray([x * sign for x in t.values]),
+                )
+            elif t.state == TrialState.RUNNING:
+                try:
+                    hebo_params = {name: t.params[name] for name in search_space.keys()}
+                except:  # NOQA
+                    # There are params which is not suggested yet.
+                    continue
+                # If `constant_liar == True`, assume that the RUNNING params result in bad values,
+                # thus preventing the simultaneous suggestion of (almost) the same params
+                # during parallel execution.
+                hebo.observe(pd.DataFrame([hebo_params]), np.asarray([worst_values]))
+            else:
+                assert False
+        params_pd = hebo.suggest()
+        params = {}
+        for name in search_space.keys():
+            params[name] = params_pd[name].to_numpy()[0]
+        return params
+
+    def _convert_to_hebo_design_space(
+        self, search_space: dict[str, BaseDistribution]
+    ) -> DesignSpace:
+        design_space = []
+        for name, distribution in search_space.items():
+            if isinstance(distribution, FloatDistribution) and not distribution.log:
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "num",
+                        "lb": distribution.low,
+                        "ub": distribution.high,
+                    }
+                )
+            elif isinstance(distribution, FloatDistribution) and distribution.log:
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "pow",
+                        "lb": distribution.low,
+                        "ub": distribution.high,
+                    }
+                )
+            elif isinstance(distribution, IntDistribution) and distribution.log:
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "pow_int",
+                        "lb": distribution.low,
+                        "ub": distribution.high,
+                    }
+                )
+            elif isinstance(distribution, IntDistribution) and distribution.step:
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "step_int",
+                        "lb": distribution.low,
+                        "ub": distribution.high,
+                        "step": distribution.step,
+                    }
+                )
+            elif isinstance(distribution, IntDistribution):
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "int",
+                        "lb": distribution.low,
+                        "ub": distribution.high,
+                    }
+                )
+            elif isinstance(distribution, CategoricalDistribution):
+                design_space.append(
+                    {
+                        "name": name,
+                        "type": "cat",
+                        "categories": distribution.choices,
+                    }
+                )
+            else:
+                raise NotImplementedError(f"Unsupported distribution: {distribution}")
+        return DesignSpace().parse(design_space)
+
+    def infer_relative_search_space(
+        self, study: Study, trial: FrozenTrial
+    ) -> dict[str, BaseDistribution]:
+        return optuna.search_space.intersection_search_space(
+            study._get_trials(deepcopy=False, use_cache=True)
+        )
+
+    def sample_independent(
+        self,
+        study: Study,
+        trial: FrozenTrial,
+        param_name: str,
+        param_distribution: BaseDistribution,
+    ) -> Any:
+        if not self._is_independent_sampler_specified:
+            warnings.warn(
+                "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?"
+            )
+
+        return self._independent_sampler.sample_independent(
+            study, trial, param_name, param_distribution
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from .sampler import HEBOSampler


		__all__ = ["HEBOSampler"]