Skip to content

Commit

Permalink
Update sampler.py
Browse files Browse the repository at this point in the history
  • Loading branch information
eukaryo authored Dec 4, 2024
1 parent 3e7e132 commit fca0635
Showing 1 changed file with 76 additions and 5 deletions.
81 changes: 76 additions & 5 deletions package/samplers/hebo_base_sampler/sampler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

from typing import Any
import warnings

from hebo.design_space.design_space import DesignSpace
from hebo.optimizers.hebo import HEBO
import numpy as np
Expand All @@ -18,6 +21,49 @@


class HEBOSampler(BaseSampler): # type: ignore
"""A sampler using `HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>__` as the backend.
For further information about HEBO algorithm, please refer to the following papers:
- `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).<https://arxiv.org/abs/2012.03826>__`
Args:
seed:
A seed for ``HEBOSampler``. Default is :obj:`None`.
constant_liar:
If :obj:`True`, penalize running trials to avoid suggesting parameter configurations
nearby.
.. note::
Abnormally terminated trials often leave behind a record with a state of
``RUNNING`` in the storage.
Such "zombie" trial parameters will be avoided by the constant liar algorithm
during subsequent sampling.
When using an :class:`~optuna.storages.RDBStorage`, it is possible to enable the
``heartbeat_interval`` to change the records for abnormally terminated trials to
``FAIL``.
(This note is quoted from `TPESampler <https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L215-L222>__`.)
.. note::
It is recommended to set this value to :obj:`True` during distributed
optimization to avoid having multiple workers evaluating similar parameter
configurations. In particular, if each objective function evaluation is costly
and the durations of the running states are significant, and/or the number of
workers is high.
(This note is quoted from `TPESampler <https://github.com/optuna/optuna/blob/v4.1.0/optuna/samplers/_tpe/sampler.py#L224-L229>__`.)
.. note::
HEBO algorithm involves multi-objective optimization of multiple acquisition functions.
While `constant_liar` is a simple way to get diverse params for parallel optimization,
it may not be the best approach for HEBO.
independent_sampler:
A :class:`~optuna.samplers.BaseSampler` instance that is used for independent
sampling. The parameters not contained in the relative search space are sampled
by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler`
is used as the default.
""" # NOQA
def __init__(
self,
seed: int | None = None,
Expand All @@ -27,6 +73,7 @@ def __init__(
self._seed = seed
self._intersection_search_space = IntersectionSearchSpace()
self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
self._is_independent_sampler_specified = independent_sampler is not None
self._constant_liar = constant_liar

def sample_relative(
Expand All @@ -35,12 +82,18 @@ def sample_relative(
trial: FrozenTrial,
search_space: dict[str, BaseDistribution],
) -> dict[str, float]:
if study._is_multi_objective():
raise ValueError("This function does not support multi-objective optimization study.")
if self._constant_liar:
target_states = [TrialState.COMPLETE, TrialState.RUNNING]
else:
target_states = [TrialState.COMPLETE]

trials = study.get_trials(deepcopy=False, states=target_states)
if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1:
# note: The backend HEBO implementation use Sobol sampling here.
# This sampler does not call `hebo.suggest()` here because
# Optuna needs to know search space by running the first trial.
return {}

# Assume that the back-end HEBO implementation aims to minimize.
Expand All @@ -50,10 +103,15 @@ def sample_relative(
worst_values = min(t.values for t in trials if t.state == TrialState.COMPLETE)
sign = 1.0 if study.direction == StudyDirection.MINIMIZE else -1.0

hebo = HEBO(self._convert_to_hebo_design_space(search_space))
hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed)
df_params = pd.DataFrame([t.params for t in trials])
values_array = np.asarray([t.values * sign if t.state == TrialState.COMPLETE else worst_values for t in trials])
hebo.observe(df_params, values_array)
# If `constant_liar == True`, assume that the RUNNING params result in bad values,
# thus preventing the simultaneous suggestion of (almost) the same params
# during parallel execution.
values_array = np.asarray(
[t.values * sign if t.state == TrialState.COMPLETE else worst_values for t in trials]
)
hebo.observe(df_params, values_array)
params_pd = hebo.suggest()
params = {}
for name in search_space.keys():
Expand Down Expand Up @@ -123,11 +181,24 @@ def _convert_to_hebo_design_space(
raise NotImplementedError(f"Unsupported distribution: {distribution}")
return DesignSpace().parse(design_space)

def infer_relative_search_space(self, study, trial): # type: ignore
def infer_relative_search_space(
self, study: Study, trial: FrozenTrial
) -> dict[str, BaseDistribution]:
return optuna.search_space.intersection_search_space(study._get_trials(deepcopy=False, use_cache=True))

def sample_independent(
self,
study: Study,
trial: FrozenTrial,
param_name: str,
param_distribution: BaseDistribution,
) -> Any:

if not self._is_independent_sampler_specified:
warnings.warn(
"`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?"
)

def sample_independent(self, study, trial, param_name, param_distribution): # type: ignore
return self._independent_sampler.sample_independent(
study, trial, param_name, param_distribution
)

0 comments on commit fca0635

Please sign in to comment.