From fca0635668996a5cce04a276a22de41e783590ee Mon Sep 17 00:00:00 2001 From: Hiroki Takizawa Date: Wed, 4 Dec 2024 11:28:29 +0900 Subject: [PATCH] Update sampler.py --- package/samplers/hebo_base_sampler/sampler.py | 81 +++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/package/samplers/hebo_base_sampler/sampler.py b/package/samplers/hebo_base_sampler/sampler.py index 56d09b02..c415261d 100644 --- a/package/samplers/hebo_base_sampler/sampler.py +++ b/package/samplers/hebo_base_sampler/sampler.py @@ -1,5 +1,8 @@ from __future__ import annotations +from typing import Any +import warnings + from hebo.design_space.design_space import DesignSpace from hebo.optimizers.hebo import HEBO import numpy as np @@ -18,6 +21,49 @@ class HEBOSampler(BaseSampler): # type: ignore + """A sampler using `HEBO __` as the backend. + + For further information about HEBO algorithm, please refer to the following papers: + - `Cowen-Rivers, Alexander I., et al. An Empirical Study of Assumptions in Bayesian Optimisation. arXiv preprint arXiv:2012.03826 (2021).__` + + Args: + seed: + A seed for ``HEBOSampler``. Default is :obj:`None`. + + constant_liar: + If :obj:`True`, penalize running trials to avoid suggesting parameter configurations + nearby. + + .. note:: + Abnormally terminated trials often leave behind a record with a state of + ``RUNNING`` in the storage. + Such "zombie" trial parameters will be avoided by the constant liar algorithm + during subsequent sampling. + When using an :class:`~optuna.storages.RDBStorage`, it is possible to enable the + ``heartbeat_interval`` to change the records for abnormally terminated trials to + ``FAIL``. + (This note is quoted from `TPESampler __`.) + + .. note:: + It is recommended to set this value to :obj:`True` during distributed + optimization to avoid having multiple workers evaluating similar parameter + configurations. In particular, if each objective function evaluation is costly + and the durations of the running states are significant, and/or the number of + workers is high. + (This note is quoted from `TPESampler __`.) + + .. note:: + HEBO algorithm involves multi-objective optimization of multiple acquisition functions. + While `constant_liar` is a simple way to get diverse params for parallel optimization, + it may not be the best approach for HEBO. + + independent_sampler: + A :class:`~optuna.samplers.BaseSampler` instance that is used for independent + sampling. The parameters not contained in the relative search space are sampled + by this sampler. If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler` + is used as the default. + + """ # NOQA def __init__( self, seed: int | None = None, @@ -27,6 +73,7 @@ def __init__( self._seed = seed self._intersection_search_space = IntersectionSearchSpace() self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed) + self._is_independent_sampler_specified = independent_sampler is not None self._constant_liar = constant_liar def sample_relative( @@ -35,12 +82,18 @@ def sample_relative( trial: FrozenTrial, search_space: dict[str, BaseDistribution], ) -> dict[str, float]: + if study._is_multi_objective(): + raise ValueError("This function does not support multi-objective optimization study.") if self._constant_liar: target_states = [TrialState.COMPLETE, TrialState.RUNNING] else: target_states = [TrialState.COMPLETE] + trials = study.get_trials(deepcopy=False, states=target_states) if len([t for t in trials if t.state == TrialState.COMPLETE]) < 1: + # note: The backend HEBO implementation use Sobol sampling here. + # This sampler does not call `hebo.suggest()` here because + # Optuna needs to know search space by running the first trial. return {} # Assume that the back-end HEBO implementation aims to minimize. @@ -50,10 +103,15 @@ def sample_relative( worst_values = min(t.values for t in trials if t.state == TrialState.COMPLETE) sign = 1.0 if study.direction == StudyDirection.MINIMIZE else -1.0 - hebo = HEBO(self._convert_to_hebo_design_space(search_space)) + hebo = HEBO(self._convert_to_hebo_design_space(search_space), scramble_seed=self._seed) df_params = pd.DataFrame([t.params for t in trials]) - values_array = np.asarray([t.values * sign if t.state == TrialState.COMPLETE else worst_values for t in trials]) - hebo.observe(df_params, values_array) + # If `constant_liar == True`, assume that the RUNNING params result in bad values, + # thus preventing the simultaneous suggestion of (almost) the same params + # during parallel execution. + values_array = np.asarray( + [t.values * sign if t.state == TrialState.COMPLETE else worst_values for t in trials] + ) + hebo.observe(df_params, values_array) params_pd = hebo.suggest() params = {} for name in search_space.keys(): @@ -123,11 +181,24 @@ def _convert_to_hebo_design_space( raise NotImplementedError(f"Unsupported distribution: {distribution}") return DesignSpace().parse(design_space) - def infer_relative_search_space(self, study, trial): # type: ignore + def infer_relative_search_space( + self, study: Study, trial: FrozenTrial + ) -> dict[str, BaseDistribution]: return optuna.search_space.intersection_search_space(study._get_trials(deepcopy=False, use_cache=True)) + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + + if not self._is_independent_sampler_specified: + warnings.warn( + "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space. Is this intended?" + ) - def sample_independent(self, study, trial, param_name, param_distribution): # type: ignore return self._independent_sampler.sample_independent( study, trial, param_name, param_distribution )