From 85d7a31c97d7e9b29e8e8c9fa19cc109e2efba1c Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Thu, 7 Nov 2024 12:03:56 -0800 Subject: [PATCH] Start sampler --- package/samplers/cmamae/README.md | 110 +++++++++++++++++++ package/samplers/cmamae/example.py | 37 +++++++ package/samplers/cmamae/sampler.py | 165 +++++++++++++++++++++++++++++ 3 files changed, 312 insertions(+) create mode 100644 package/samplers/cmamae/README.md create mode 100644 package/samplers/cmamae/example.py create mode 100644 package/samplers/cmamae/sampler.py diff --git a/package/samplers/cmamae/README.md b/package/samplers/cmamae/README.md new file mode 100644 index 00000000..8724563c --- /dev/null +++ b/package/samplers/cmamae/README.md @@ -0,0 +1,110 @@ +--- +author: Bryon Tjanaka +title: Please fill in the title of the feature here. (e.g., Gaussian-Process Expected Improvement Sampler) +description: Please fill in the description of the feature here. (e.g., This sampler searches for each trial based on expected improvement using Gaussian process.) +tags: [Please fill in the list of tags here. (e.g., sampler, visualization, pruner)] +optuna_versions: ['Please fill in the list of versions of Optuna in which you have confirmed the feature works, e.g., 3.6.1.'] +license: MIT License +--- + + + +Please read the [tutorial guide](https://optuna.github.io/optunahub-registry/recipes/001_first.html) to register your feature in OptunaHub. +You can find more detailed explanation of the following contents in the tutorial. +Looking at [other packages' implementations](https://github.com/optuna/optunahub-registry/tree/main/package) will also help you. + +## Abstract + +You can provide an abstract for your package here. +This section will help attract potential users to your package. + +**Example** + +This package provides a sampler based on Gaussian process-based Bayesian optimization. The sampler is highly sample-efficient, so it is suitable for computationally expensive optimization problems with a limited evaluation budget, such as hyperparameter optimization of machine learning algorithms. + +## Class or Function Names + +Please fill in the class/function names which you implement here. + +**Example** + +- GPSampler + +## Installation + +If you have additional dependencies, please fill in the installation guide here. +If no additional dependencies is required, **this section can be removed**. + +**Example** + +```shell +$ pip install scipy torch +``` + +If your package has `requirements.txt`, it will be automatically uploaded to the OptunaHub, and the package dependencies will be available to install as follows. + +```shell + pip install -r https://hub.optuna.org/{category}/{your_package_name}/requirements.txt +``` + +## Example + +Please fill in the code snippet to use the implemented feature here. + +**Example** + +```python +import optuna +import optunahub + + +def objective(trial): + x = trial.suggest_float("x", -5, 5) + return x**2 + + +sampler = optunahub.load_module(package="samplers/gp").GPSampler() +study = optuna.create_study(sampler=sampler) +study.optimize(objective, n_trials=100) +``` + +## Others + +Please fill in any other information if you have here by adding child sections (###). +If there is no additional information, **this section can be removed**. + + diff --git a/package/samplers/cmamae/example.py b/package/samplers/cmamae/example.py new file mode 100644 index 00000000..7e3d45dc --- /dev/null +++ b/package/samplers/cmamae/example.py @@ -0,0 +1,37 @@ +import optuna +import optunahub + +from sampler import CmaMaeSampler + +# TODO: Replace above import with this. +# module = optunahub.load_module("samplers/pyribs") +# PyribsSampler = module.PyribsSampler + + +def objective(trial: optuna.trial.Trial) -> float: + x = trial.suggest_float("x", -10, 10) + y = trial.suggest_float("y", -10, 10) + return -(x**2 + y**2) + 2, x, y + + +if __name__ == "__main__": + sampler = CmaMaeSampler( + param_names=["x", "y"], + archive_dims=[20, 20], + archive_ranges=[(-10, 10), (-10, 10)], + archive_learning_rate=0.1, + archive_threshold_min=-10, + n_emitters=15, + emitter_x0={ + "x": 5, + "y": 5 + }, + emitter_sigma0=0.1, + emitter_batch_size=36, + ) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=100) + print(study.best_trial.params) + + fig = optuna.visualization.plot_optimization_history(study) + fig.write_image("cmamae_optimization_history.png") diff --git a/package/samplers/cmamae/sampler.py b/package/samplers/cmamae/sampler.py new file mode 100644 index 00000000..f780e5b5 --- /dev/null +++ b/package/samplers/cmamae/sampler.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +from collections.abc import Sequence + +import numpy as np +import optunahub +from optuna.distributions import BaseDistribution +from optuna.study import Study +from optuna.trial import FrozenTrial, TrialState +from ribs.archives import GridArchive +from ribs.emitters import EvolutionStrategyEmitter +from ribs.schedulers import Scheduler + +SimpleBaseSampler = optunahub.load_module("samplers/simple").SimpleBaseSampler + + +class CmaMaeSampler(SimpleBaseSampler): + """A sampler using CMA-MAE as implemented in pyribs. + + `CMA-MAE `_ is a quality diversity + algorithm that has demonstrated state-of-the-art performance in a variety of + domains. `pyribs `_ is a bare-bones Python library for + quality diversity optimization algorithms. For a primer on CMA-MAE and + pyribs, we recommend referring to the series of `pyribs tutorials + `_. + + For simplicity, this implementation provides a default instantiation of + CMA-MAE with a `GridArchive + `_ and + `EvolutionStrategyEmitter + `_ + with improvement ranking, all wrapped up in a `Scheduler + `_. + + Args: + param_names: List of names of parameters to optimize. + archive_dims: Number of archive cells in each dimension of the measure + space, e.g. ``[20, 30, 40]`` indicates there should be 3 dimensions + with 20, 30, and 40 cells. (The number of dimensions is implicitly + defined in the length of this argument). + archive_ranges: Upper and lower bound of each dimension of the measure + space for the archive, e.g. ``[(-1, 1), (-2, 2)]`` indicates the + first dimension should have bounds :math:`[-1,1]` (inclusive), and + the second dimension should have bounds :math:`[-2,2]` (inclusive). + ``ranges`` should be the same length as ``dims``. + archive_learning_rate: The learning rate for threshold updates in the + archive. + archive_threshold_min: The initial threshold value for all the cells in + the archive. + n_emitters: Number of emitters to use in CMA-MAE. + emitter_x0: Mapping from parameter names to their initial values. + emitter_sigma0: Initial step size / standard deviation of the + distribution from which solutions are sampled in the emitter. + emitter_batch_size: Number of solutions for each emitter to generate on + each iteration. + """ + + def __init__( + self, + *, + param_names: list[str], + archive_dims: list[int], + archive_ranges: list[tuple[float, float]], + archive_learning_rate: float, + archive_threshold_min: float, + n_emitters: int, + emitter_x0: dict[str, float], + emitter_sigma0: float, + emitter_batch_size: int, + ) -> None: + super().__init__() + + self._validate_params(param_names, emitter_x0) + self._param_names = param_names[:] + + emitter_x0_np = self._convert_to_pyribs_params(emitter_x0) + + archive = GridArchive( + solution_dim=len(param_names), + dims=archive_dims, + ranges=archive_ranges, + learning_rate=archive_learning_rate, + threshold_min=archive_threshold_min, + ) + result_archive = GridArchive( + solution_dim=len(param_names), + dims=archive_dims, + ranges=archive_ranges, + ) + emitters = [ + EvolutionStrategyEmitter( + archive, + x0=emitter_x0_np, + sigma0=emitter_sigma0, + ranker="imp", + selection_rule="mu", + restart_rule="basic", + batch_size=emitter_batch_size, + ) for _ in range(n_emitters) + ] + + # Number of solutions generated in each batch from pyribs. + self._batch_size = n_emitters * emitter_batch_size + + self._scheduler = Scheduler( + archive, + emitters, + result_archive=result_archive, + ) + + def _validate_params(self, param_names: list[str], + emitter_x0: dict[str, float]) -> None: + dim = len(param_names) + param_set = set(param_names) + if dim != len(param_set): + raise ValueError( + "Some elements in param_names are duplicated. Please make it a unique list." + ) + + if set(param_names) != emitter_x0.keys(): + raise ValueError( + "emitter_x0 does not contain the parameters listed in param_names. " + "Please provide an initial value for each parameter.") + + def _convert_to_pyribs_params(self, params: dict[str, float]) -> np.ndarray: + np_params = np.empty(len(self._param_names), dtype=float) + for i, p in enumerate(self._param_names): + np_params[i] = params[p] + return np_params + + def _convert_to_optuna_params(self, params: np.ndarray) -> dict[str, float]: + dict_params = {} + for i, p in enumerate(self._param_names): + dict_params[p] = params[i] + return dict_params + + def sample_relative( + self, study: Study, trial: FrozenTrial, + search_space: dict[str, BaseDistribution]) -> dict[str, float]: + # Note: Batch optimization means we need to enqueue trials. + # https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.enqueue_trial + if trial.number % self._batch_size == 0: + sols = self._scheduler.ask() + for sol in sols: + params = self._convert_to_optuna_params(sol) + study.enqueue_trial(params) + + # Probably, this trial is taken from the queue, so we do not have to take it? + # but I need to look into it. + return trial + + def after_trial( + self, + study: Study, + trial: FrozenTrial, + state: TrialState, + values: Sequence[float] | None, + ) -> None: + # TODO + if trial.number % self._batch_size == self._batch_size - 1: + results = [ + t.values[trial.number - self._batch_size + 1:trial.number + 1] + for t in study.trials + ] + scheduler.tell