Merge pull request #205 from nabenabe0928/refactor-hebo

Refactor `HEBOSampler` based on `TPESampler`
optuna · Dec 13, 2024 · 23d0c50 · 23d0c50
2 parents 585c8bc + 087d11a
commit 23d0c50
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 23 deletions.
diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md
@@ -14,10 +14,12 @@ license: MIT License
 ## Installation
 
 ```bash
-pip install -r https://hub.optuna.org/samplers/hebo/requirements.txt
-git clone [email protected]:huawei-noah/HEBO.git
-cd HEBO/HEBO
-pip install -e .
+# Install the dependencies.
+pip install optunahub hebo
+
+# NOTE: Below is optional, but pymoo must be installed after NumPy for faster HEBOSampler,
+# we run the following command to make sure that the compiled version is installed.
+pip install --upgrade pymoo
 ```
 
 ## APIs
@@ -59,11 +61,7 @@ def objective(trial: optuna.trial.Trial) -> float:
 
 
 module = optunahub.load_module("samplers/hebo")
-sampler = module.HEBOSampler(search_space={
-    "x": optuna.distributions.FloatDistribution(-10, 10),
-    "y": optuna.distributions.IntDistribution(-10, 10),
-})
-# sampler = module.HEBOSampler()  # Note: `search_space` is not required, and thus it works too.
+sampler = module.HEBOSampler()
 study = optuna.create_study(sampler=sampler)
 study.optimize(objective, n_trials=100)
 
@@ -73,6 +71,19 @@ print(study.best_trial.params, study.best_trial.value)
 See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo/example.py) for a full example.
 ![History Plot](images/hebo_optimization_history.png "History Plot")
 
+Note that it may slightly speed up the sampling routine by giving the search space directly to `HEBOSampler` since Optuna can skip the search space inference.
+For example, the instantiation of `HEBOSampler` above can be modified as follows:
+
+```python
+search_space={
+    "x": optuna.distributions.FloatDistribution(-10, 10),
+    "y": optuna.distributions.IntDistribution(-10, 10),
+}
+sampler = module.HEBOSampler(search_space=search_space)
+```
+
+However, users need to make sure that the provided search space and the search space defined in the objective function must be consistent.
+
 ## Others
 
 HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).

diff --git a/package/samplers/hebo/requirements.txt b/package/samplers/hebo/requirements.txt
@@ -1,3 +1,3 @@
 optuna
 optunahub
-hebo@git+https://github.com/huawei-noah/[email protected]#subdirectory=HEBO
+hebo
diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py
@@ -2,14 +2,14 @@
 
 from collections.abc import Sequence
 from typing import Any
-import warnings
 
 import numpy as np
 import optuna
 from optuna.distributions import BaseDistribution
 from optuna.distributions import CategoricalDistribution
 from optuna.distributions import FloatDistribution
 from optuna.distributions import IntDistribution
+from optuna.logging import get_logger
 from optuna.samplers import BaseSampler
 from optuna.search_space import IntersectionSearchSpace
 from optuna.study import Study
@@ -23,6 +23,9 @@
 from hebo.optimizers.hebo import HEBO
 
 
+_logger = get_logger(f"optuna.{__name__}")
+
+
 class HEBOSampler(optunahub.samplers.SimpleBaseSampler):
     """A sampler using `HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>__` as the backend.
 
@@ -85,13 +88,12 @@ def __init__(
             self._hebo = None
         self._intersection_search_space = IntersectionSearchSpace()
         self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
-        self._is_independent_sample_necessary = False
         self._constant_liar = constant_liar
         self._rng = np.random.default_rng(seed)
 
     def _sample_relative_define_and_run(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         return {
             name: row.iloc[0]
             for name, row in self._hebo.suggest().items()
@@ -100,7 +102,7 @@ def _sample_relative_define_and_run(
 
     def _sample_relative_stateless(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         if self._constant_liar:
             target_states = [TrialState.COMPLETE, TrialState.RUNNING]
         else:
@@ -113,10 +115,8 @@ def _sample_relative_stateless(
             # note: The backend HEBO implementation uses Sobol sampling here.
             # This sampler does not call `hebo.suggest()` here because
             # Optuna needs to know search space by running the first trial in Define-by-Run.
-            self._is_independent_sample_necessary = True
             return {}
-        else:
-            self._is_independent_sample_necessary = False
+
         trials = [t for t in trials if set(search_space.keys()) <= set(t.params.keys())]
 
         # Assume that the back-end HEBO implementation aims to minimize.
@@ -131,7 +131,7 @@ def _sample_relative_stateless(
         params = pd.DataFrame([t.params for t in trials])
         values[np.isnan(values)] = worst_value
         values *= sign
-        hebo.observe(params, values)
+        hebo.observe(params, values[:, np.newaxis])
         return {
             name: row.iloc[0]
             for name, row in hebo.suggest().items()
@@ -140,7 +140,7 @@ def _sample_relative_stateless(
 
     def sample_relative(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         if study._is_multi_objective():
             raise ValueError(
                 f"{self.__class__.__name__} has not supported multi-objective optimization."
@@ -240,10 +240,10 @@ def sample_independent(
         param_name: str,
         param_distribution: BaseDistribution,
     ) -> Any:
-        if not self._is_independent_sample_necessary:
-            warnings.warn(
-                "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space."
-            )
+        states = (TrialState.COMPLETE,)
+        trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
+        if any(param_name in trial.params for trial in trials):
+            _logger.warn(f"Use `RandomSampler` for {param_name} due to dynamic search space.")
 
         return self._independent_sampler.sample_independent(
             study, trial, param_name, param_distribution