diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md index c60ea858..11f1d1a2 100644 --- a/package/samplers/hebo/README.md +++ b/package/samplers/hebo/README.md @@ -14,10 +14,12 @@ license: MIT License ## Installation ```bash -pip install -r https://hub.optuna.org/samplers/hebo/requirements.txt -git clone git@github.com:huawei-noah/HEBO.git -cd HEBO/HEBO -pip install -e . +# Install the dependencies. +pip install optunahub hebo + +# NOTE: Below is optional, but pymoo must be installed after NumPy for faster HEBOSampler, +# we run the following command to make sure that the compiled version is installed. +pip install --upgrade pymoo ``` ## APIs @@ -59,11 +61,7 @@ def objective(trial: optuna.trial.Trial) -> float: module = optunahub.load_module("samplers/hebo") -sampler = module.HEBOSampler(search_space={ - "x": optuna.distributions.FloatDistribution(-10, 10), - "y": optuna.distributions.IntDistribution(-10, 10), -}) -# sampler = module.HEBOSampler() # Note: `search_space` is not required, and thus it works too. +sampler = module.HEBOSampler() study = optuna.create_study(sampler=sampler) study.optimize(objective, n_trials=100) @@ -73,6 +71,19 @@ print(study.best_trial.params, study.best_trial.value) See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo/example.py) for a full example. ![History Plot](images/hebo_optimization_history.png "History Plot") +Note that it may slightly speed up the sampling routine by giving the search space directly to `HEBOSampler` since Optuna can skip the search space inference. +For example, the instantiation of `HEBOSampler` above can be modified as follows: + +```python +search_space={ + "x": optuna.distributions.FloatDistribution(-10, 10), + "y": optuna.distributions.IntDistribution(-10, 10), +} +sampler = module.HEBOSampler(search_space=search_space) +``` + +However, users need to make sure that the provided search space and the search space defined in the objective function must be consistent. + ## Others HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard). diff --git a/package/samplers/hebo/requirements.txt b/package/samplers/hebo/requirements.txt index 2ba8ec1a..2edc8955 100644 --- a/package/samplers/hebo/requirements.txt +++ b/package/samplers/hebo/requirements.txt @@ -1,3 +1,3 @@ optuna optunahub -hebo@git+https://github.com/huawei-noah/HEBO.git@v0.3.6#subdirectory=HEBO +hebo diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py index f77a2d2a..c9ddc3c9 100644 --- a/package/samplers/hebo/sampler.py +++ b/package/samplers/hebo/sampler.py @@ -2,7 +2,6 @@ from collections.abc import Sequence from typing import Any -import warnings import numpy as np import optuna @@ -10,6 +9,7 @@ from optuna.distributions import CategoricalDistribution from optuna.distributions import FloatDistribution from optuna.distributions import IntDistribution +from optuna.logging import get_logger from optuna.samplers import BaseSampler from optuna.search_space import IntersectionSearchSpace from optuna.study import Study @@ -23,6 +23,9 @@ from hebo.optimizers.hebo import HEBO +_logger = get_logger(f"optuna.{__name__}") + + class HEBOSampler(optunahub.samplers.SimpleBaseSampler): """A sampler using `HEBO __` as the backend. @@ -85,13 +88,12 @@ def __init__( self._hebo = None self._intersection_search_space = IntersectionSearchSpace() self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) - self._is_independent_sample_necessary = False self._constant_liar = constant_liar self._rng = np.random.default_rng(seed) def _sample_relative_define_and_run( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] - ) -> dict[str, float]: + ) -> dict[str, Any]: return { name: row.iloc[0] for name, row in self._hebo.suggest().items() @@ -100,7 +102,7 @@ def _sample_relative_define_and_run( def _sample_relative_stateless( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] - ) -> dict[str, float]: + ) -> dict[str, Any]: if self._constant_liar: target_states = [TrialState.COMPLETE, TrialState.RUNNING] else: @@ -113,10 +115,8 @@ def _sample_relative_stateless( # note: The backend HEBO implementation uses Sobol sampling here. # This sampler does not call `hebo.suggest()` here because # Optuna needs to know search space by running the first trial in Define-by-Run. - self._is_independent_sample_necessary = True return {} - else: - self._is_independent_sample_necessary = False + trials = [t for t in trials if set(search_space.keys()) <= set(t.params.keys())] # Assume that the back-end HEBO implementation aims to minimize. @@ -131,7 +131,7 @@ def _sample_relative_stateless( params = pd.DataFrame([t.params for t in trials]) values[np.isnan(values)] = worst_value values *= sign - hebo.observe(params, values) + hebo.observe(params, values[:, np.newaxis]) return { name: row.iloc[0] for name, row in hebo.suggest().items() @@ -140,7 +140,7 @@ def _sample_relative_stateless( def sample_relative( self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] - ) -> dict[str, float]: + ) -> dict[str, Any]: if study._is_multi_objective(): raise ValueError( f"{self.__class__.__name__} has not supported multi-objective optimization." @@ -240,10 +240,10 @@ def sample_independent( param_name: str, param_distribution: BaseDistribution, ) -> Any: - if not self._is_independent_sample_necessary: - warnings.warn( - "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space." - ) + states = (TrialState.COMPLETE,) + trials = study._get_trials(deepcopy=False, states=states, use_cache=True) + if any(param_name in trial.params for trial in trials): + _logger.warn(f"Use `RandomSampler` for {param_name} due to dynamic search space.") return self._independent_sampler.sample_independent( study, trial, param_name, param_distribution