Skip to content

Commit

Permalink
Merge pull request #205 from nabenabe0928/refactor-hebo
Browse files Browse the repository at this point in the history
Refactor `HEBOSampler` based on `TPESampler`
  • Loading branch information
HideakiImamura authored Dec 13, 2024
2 parents 585c8bc + 087d11a commit 23d0c50
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 23 deletions.
29 changes: 20 additions & 9 deletions package/samplers/hebo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ license: MIT License
## Installation

```bash
pip install -r https://hub.optuna.org/samplers/hebo/requirements.txt
git clone [email protected]:huawei-noah/HEBO.git
cd HEBO/HEBO
pip install -e .
# Install the dependencies.
pip install optunahub hebo

# NOTE: Below is optional, but pymoo must be installed after NumPy for faster HEBOSampler,
# we run the following command to make sure that the compiled version is installed.
pip install --upgrade pymoo
```

## APIs
Expand Down Expand Up @@ -59,11 +61,7 @@ def objective(trial: optuna.trial.Trial) -> float:


module = optunahub.load_module("samplers/hebo")
sampler = module.HEBOSampler(search_space={
"x": optuna.distributions.FloatDistribution(-10, 10),
"y": optuna.distributions.IntDistribution(-10, 10),
})
# sampler = module.HEBOSampler() # Note: `search_space` is not required, and thus it works too.
sampler = module.HEBOSampler()
study = optuna.create_study(sampler=sampler)
study.optimize(objective, n_trials=100)

Expand All @@ -73,6 +71,19 @@ print(study.best_trial.params, study.best_trial.value)
See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo/example.py) for a full example.
![History Plot](images/hebo_optimization_history.png "History Plot")

Note that it may slightly speed up the sampling routine by giving the search space directly to `HEBOSampler` since Optuna can skip the search space inference.
For example, the instantiation of `HEBOSampler` above can be modified as follows:

```python
search_space={
"x": optuna.distributions.FloatDistribution(-10, 10),
"y": optuna.distributions.IntDistribution(-10, 10),
}
sampler = module.HEBOSampler(search_space=search_space)
```

However, users need to make sure that the provided search space and the search space defined in the objective function must be consistent.

## Others

HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).
Expand Down
2 changes: 1 addition & 1 deletion package/samplers/hebo/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
optuna
optunahub
hebo@git+https://github.com/huawei-noah/[email protected]#subdirectory=HEBO
hebo
26 changes: 13 additions & 13 deletions package/samplers/hebo/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

from collections.abc import Sequence
from typing import Any
import warnings

import numpy as np
import optuna
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution
from optuna.distributions import FloatDistribution
from optuna.distributions import IntDistribution
from optuna.logging import get_logger
from optuna.samplers import BaseSampler
from optuna.search_space import IntersectionSearchSpace
from optuna.study import Study
Expand All @@ -23,6 +23,9 @@
from hebo.optimizers.hebo import HEBO


_logger = get_logger(f"optuna.{__name__}")


class HEBOSampler(optunahub.samplers.SimpleBaseSampler):
"""A sampler using `HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>__` as the backend.
Expand Down Expand Up @@ -85,13 +88,12 @@ def __init__(
self._hebo = None
self._intersection_search_space = IntersectionSearchSpace()
self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
self._is_independent_sample_necessary = False
self._constant_liar = constant_liar
self._rng = np.random.default_rng(seed)

def _sample_relative_define_and_run(
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
) -> dict[str, float]:
) -> dict[str, Any]:
return {
name: row.iloc[0]
for name, row in self._hebo.suggest().items()
Expand All @@ -100,7 +102,7 @@ def _sample_relative_define_and_run(

def _sample_relative_stateless(
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
) -> dict[str, float]:
) -> dict[str, Any]:
if self._constant_liar:
target_states = [TrialState.COMPLETE, TrialState.RUNNING]
else:
Expand All @@ -113,10 +115,8 @@ def _sample_relative_stateless(
# note: The backend HEBO implementation uses Sobol sampling here.
# This sampler does not call `hebo.suggest()` here because
# Optuna needs to know search space by running the first trial in Define-by-Run.
self._is_independent_sample_necessary = True
return {}
else:
self._is_independent_sample_necessary = False

trials = [t for t in trials if set(search_space.keys()) <= set(t.params.keys())]

# Assume that the back-end HEBO implementation aims to minimize.
Expand All @@ -131,7 +131,7 @@ def _sample_relative_stateless(
params = pd.DataFrame([t.params for t in trials])
values[np.isnan(values)] = worst_value
values *= sign
hebo.observe(params, values)
hebo.observe(params, values[:, np.newaxis])
return {
name: row.iloc[0]
for name, row in hebo.suggest().items()
Expand All @@ -140,7 +140,7 @@ def _sample_relative_stateless(

def sample_relative(
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
) -> dict[str, float]:
) -> dict[str, Any]:
if study._is_multi_objective():
raise ValueError(
f"{self.__class__.__name__} has not supported multi-objective optimization."
Expand Down Expand Up @@ -240,10 +240,10 @@ def sample_independent(
param_name: str,
param_distribution: BaseDistribution,
) -> Any:
if not self._is_independent_sample_necessary:
warnings.warn(
"`HEBOSampler` falls back to `RandomSampler` due to dynamic search space."
)
states = (TrialState.COMPLETE,)
trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
if any(param_name in trial.params for trial in trials):
_logger.warn(f"Use `RandomSampler` for {param_name} due to dynamic search space.")

return self._independent_sampler.sample_independent(
study, trial, param_name, param_distribution
Expand Down

0 comments on commit 23d0c50

Please sign in to comment.