diff --git a/package/samplers/mab_epsilon_greedy/LICENSE b/package/samplers/mab_epsilon_greedy/LICENSE new file mode 100644 index 00000000..cacc4256 --- /dev/null +++ b/package/samplers/mab_epsilon_greedy/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/package/samplers/mab_epsilon_greedy/README.md b/package/samplers/mab_epsilon_greedy/README.md new file mode 100644 index 00000000..78831dab --- /dev/null +++ b/package/samplers/mab_epsilon_greedy/README.md @@ -0,0 +1,25 @@ +--- +author: Ryota Nishijima +title: MAB Epsilon-Greedy Sampler +description: Sampler based on multi-armed bandit algorithm with epsilon-greedy arm selection. +tags: [sampler, multi-armed bandit] +optuna_versions: [4.0.0] +license: MIT License +--- + +## Class or Function Names + +- MABEpsilonGreedySampler + +## Example + +```python +mod = optunahub.load_module("samplers/mab_epsilon_greedy") +sampler = mod.MABEpsilonGreedySampler() +``` + +See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/mab_epsilon_greedy/example.py) for more details. + +## Others + +This package provides a sampler based on Multi-armed bandit algorithm with epsilon-greedy selection. diff --git a/package/samplers/mab_epsilon_greedy/__init__.py b/package/samplers/mab_epsilon_greedy/__init__.py new file mode 100644 index 00000000..5f7ee5ef --- /dev/null +++ b/package/samplers/mab_epsilon_greedy/__init__.py @@ -0,0 +1,4 @@ +from .mab_epsilon_greedy import MABEpsilonGreedySampler + + +__all__ = ["MABEpsilonGreedySampler"] diff --git a/package/samplers/mab_epsilon_greedy/example.py b/package/samplers/mab_epsilon_greedy/example.py new file mode 100644 index 00000000..67298e51 --- /dev/null +++ b/package/samplers/mab_epsilon_greedy/example.py @@ -0,0 +1,20 @@ +import optuna +import optunahub + + +if __name__ == "__main__": + module = optunahub.load_module( + package="samplers/mab_epsilon_greedy", + ) + sampler = module.MABEpsilonGreedySampler() + + def objective(trial: optuna.Trial) -> float: + x = trial.suggest_categorical("arm_1", [1, 2, 3]) + y = trial.suggest_categorical("arm_2", [1, 2]) + + return x + y + + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=20) + + print(study.best_trial.value, study.best_trial.params) diff --git a/package/samplers/mab_epsilon_greedy/mab_epsilon_greedy.py b/package/samplers/mab_epsilon_greedy/mab_epsilon_greedy.py new file mode 100644 index 00000000..b43ef1ea --- /dev/null +++ b/package/samplers/mab_epsilon_greedy/mab_epsilon_greedy.py @@ -0,0 +1,70 @@ +from collections import defaultdict +from typing import Any +from typing import Optional + +from optuna.distributions import BaseDistribution +from optuna.samplers import RandomSampler +from optuna.study import Study +from optuna.study._study_direction import StudyDirection +from optuna.trial import FrozenTrial +from optuna.trial import TrialState + + +class MABEpsilonGreedySampler(RandomSampler): + """Sampler based on Multi-armed Bandit Algorithm. + + Args: + epsilon (float): + Params for epsolon-greedy algorithm. + epsilon is probability of selecting arm randomly. + seed (int | None): + Seed for random number generator and arm selection. + + """ + + def __init__( + self, + epsilon: float = 0.7, + seed: Optional[int] = None, + ) -> None: + super().__init__(seed) + self._epsilon = epsilon + + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + states = (TrialState.COMPLETE, TrialState.PRUNED) + trials = study._get_trials(deepcopy=False, states=states, use_cache=True) + + rewards_by_choice: defaultdict = defaultdict(float) + cnt_by_choice: defaultdict = defaultdict(int) + for t in trials: + rewards_by_choice[t.params[param_name]] += t.value + cnt_by_choice[t.params[param_name]] += 1 + + # Use never selected arm for initialization like UCB1 algorithm. + # ref. https://github.com/optuna/optunahub-registry/pull/155#discussion_r1780446062 + never_selected = [ + arm for arm in param_distribution.choices if arm not in rewards_by_choice + ] + if never_selected: + return self._rng.rng.choice(never_selected) + + # If all arms are selected at least once, select arm by epsilon-greedy. + if self._rng.rng.rand() < self._epsilon: + return self._rng.rng.choice(param_distribution.choices) + else: + if study.direction == StudyDirection.MINIMIZE: + return min( + param_distribution.choices, + key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1), + ) + else: + return max( + param_distribution.choices, + key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1), + )