-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #160 from nabenabe0928/add-user-prior-cma-es
Add CMA-ES with user prior
- Loading branch information
Showing
4 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2024 Shuhei Watanabe | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
--- | ||
author: Shuhei Watanabe | ||
title: CMA-ES with User Prior | ||
description: You can provide the initial parameters, i.e. mean vector and covariance matrix, for CMA-ES with this sampler. | ||
tags: [sampler, cma-es, meta-learning, prior] | ||
optuna_versions: [4.0.0] | ||
license: MIT License | ||
--- | ||
|
||
## Abstract | ||
|
||
As the Optuna CMA-ES sampler does not support any flexible ways to initialize the parameters of the Gaussian distribution, so I created a workaround to do so. | ||
|
||
## Class or Function Names | ||
|
||
- UserPriorCmaEsSampler | ||
|
||
In principle, most arguments follow [`optuna.samplers.CmaEsSampler`](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html), but some parts are modified. | ||
|
||
For example, `UserPriorCmaEsSampler` does not support `source_trials` and `use_separable_cma` due to their incompatibility. | ||
Instead, we replaced `x0` and `sigma0` in `CmaEsSampler` with `mu0` and `cov0`. | ||
In `CmaEsSampler`, we needed to provide `x0` as `dict` and `sigma0` only as `float`. | ||
By adding `param_names` to the requirement, we can now give `mu0` (previously `x0`) and `cov0` (previously `sigma0`) as `np.ndarray`. | ||
Note that the order of each dimension in `mu0` and `cov0` must be consistent with that in `param_names`. | ||
|
||
## Installation | ||
|
||
```shell | ||
$ pip install optunahub cmaes | ||
``` | ||
|
||
## Example | ||
|
||
The simplest code example is as follows: | ||
|
||
```python | ||
import numpy as np | ||
import optuna | ||
import optunahub | ||
|
||
|
||
def objective(trial: optuna.Trial) -> float: | ||
x = trial.suggest_float("x", -50, -40) | ||
y = trial.suggest_int("y", -5, 5) | ||
return (x + 43)**2 + (y - 2)**2 | ||
|
||
|
||
if __name__ == "__main__": | ||
module = optunahub.load_module(package="samplers/user_prior_cmaes") | ||
# ``with_margin=True`` because the search space has an integer parameter. | ||
sampler = module.UserPriorCmaEsSampler( | ||
param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]), with_margin=True | ||
) | ||
study = optuna.create_study(sampler=sampler) | ||
study.optimize(objective, n_trials=20) | ||
print(study.best_trial.value, study.best_trial.params) | ||
|
||
``` | ||
|
||
Although `UserPriorCmaEsSampler` CANNOT support log scale from the sampler side, we have a workaround to do so: | ||
|
||
```python | ||
import math | ||
|
||
import numpy as np | ||
import optuna | ||
import optunahub | ||
|
||
|
||
def objective(trial: optuna.Trial) -> float: | ||
# For example, trial.suggest_float("x", 1e-5, 1.0, log=True) can be encoded as: | ||
x = 10 ** trial.suggest_float("log10_x", -5, 0) | ||
# trial.suggest_float("y", 2, 1024, log=True) can be encoded as: | ||
y = 2 ** trial.suggest_float("log2_y", 1, 10) | ||
# In general, trial.suggest_float("z", low, high, log=True) can be encoded as: | ||
low, high = 3, 81 | ||
b = 3 # The base of log can be any positive number. | ||
z = b ** trial.suggest_float("logb_z", math.log(low, b), math.log(high, b)) | ||
return x**2 + y**2 + z**2 | ||
|
||
|
||
if __name__ == "__main__": | ||
module = optunahub.load_module(package="samplers/user_prior_cmaes") | ||
sampler = module.UserPriorCmaEsSampler( | ||
param_names=["log10_x", "log2_y", "logb_z"], | ||
mu0=np.array([-4, 8, 3]), | ||
cov0=np.diag([0.2, 1., 0.1]), | ||
) | ||
study = optuna.create_study(sampler=sampler) | ||
study.optimize(objective, n_trials=20) | ||
print(study.best_trial.value, study.best_trial.params) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .sampler import UserPriorCmaEsSampler | ||
|
||
|
||
__all__ = ["UserPriorCmaEsSampler"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
from __future__ import annotations | ||
|
||
import math | ||
from typing import Any | ||
from typing import Union | ||
|
||
import cmaes | ||
import numpy as np | ||
from optuna import Study | ||
from optuna._transform import _SearchSpaceTransform | ||
from optuna.distributions import BaseDistribution | ||
from optuna.distributions import FloatDistribution | ||
from optuna.distributions import IntDistribution | ||
from optuna.samplers import BaseSampler | ||
from optuna.samplers import CmaEsSampler | ||
from optuna.study import StudyDirection | ||
from optuna.trial import FrozenTrial | ||
|
||
|
||
CmaClass = Union[cmaes.CMA, cmaes.SepCMA, cmaes.CMAwM] | ||
|
||
|
||
class UserPriorCmaEsSampler(CmaEsSampler): | ||
"""A sampler using `cmaes <https://github.com/CyberAgentAILab/cmaes>`__ as the backend with user prior. | ||
Please check ``CmaEsSampler`` in Optuna for more details of each argument. | ||
This class modified the arguments ``x0`` and ``sigma0`` in ``CmaEsSampler`` of Optuna. | ||
Furthermore, due to the incompatibility, | ||
This class does not support ``source_trials`` and ``use_separable_cma``. | ||
Args: | ||
param_names: | ||
The list of the parameter names to be tuned. This list must be a unique list. | ||
mu0: | ||
The mean vector used for the initialization of CMA-ES. | ||
cov0: | ||
The covariance matrix used for the initialization of CMA-ES. | ||
""" # NOQA: E501 | ||
|
||
def __init__( | ||
self, | ||
param_names: list[str], | ||
mu0: np.ndarray, | ||
cov0: np.ndarray, | ||
n_startup_trials: int = 1, | ||
independent_sampler: BaseSampler | None = None, | ||
warn_independent_sampling: bool = True, | ||
seed: int | None = None, | ||
*, | ||
consider_pruned_trials: bool = False, | ||
restart_strategy: str | None = None, | ||
popsize: int | None = None, | ||
inc_popsize: int = 2, | ||
with_margin: bool = False, | ||
lr_adapt: bool = False, | ||
) -> None: | ||
super().__init__( | ||
x0=None, | ||
sigma0=None, | ||
n_startup_trials=n_startup_trials, | ||
independent_sampler=independent_sampler, | ||
warn_independent_sampling=warn_independent_sampling, | ||
seed=seed, | ||
consider_pruned_trials=consider_pruned_trials, | ||
restart_strategy=restart_strategy, | ||
popsize=popsize, | ||
inc_popsize=inc_popsize, | ||
use_separable_cma=False, | ||
with_margin=with_margin, | ||
lr_adapt=lr_adapt, | ||
source_trials=None, | ||
) | ||
self._validate_user_prior(param_names, mu0, cov0) | ||
self._param_names = param_names[:] | ||
self._mu0 = mu0.astype(float) | ||
self._cov0 = cov0.astype(float) | ||
|
||
def _validate_user_prior( | ||
self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray | ||
) -> None: | ||
dim = len(param_names) | ||
if dim != len(set(param_names)): | ||
raise ValueError( | ||
"Some elements in param_names are duplicated. Please make it a unique list." | ||
) | ||
if mu0.shape != (dim,) or cov0.shape != (dim, dim): | ||
raise ValueError( | ||
f"The shape of mu0 and cov0 must be (len(param_names)={dim}, ) and " | ||
f"(len(param_names)={dim}, len(param_names)={dim}), but got {mu0.shape} and " | ||
f"{cov0.shape}." | ||
) | ||
if not np.allclose(cov0, cov0.T): | ||
raise ValueError("cov0 must be a symmetric matrix.") | ||
if np.any(cov0 < 0.0): | ||
raise ValueError("All elements in cov0 must be non-negative.") | ||
if np.any(np.linalg.eigvals(cov0) < 0.0): | ||
raise ValueError("cov0 must be a semi-positive definite matrix.") | ||
|
||
def sample_relative( | ||
self, | ||
study: Study, | ||
trial: FrozenTrial, | ||
search_space: dict[str, BaseDistribution], | ||
) -> dict[str, Any]: | ||
if len(search_space) != 0 and set(search_space.keys()) != set(self._param_names): | ||
raise ValueError( | ||
"The keys in search_space and param_names did not match. " | ||
"The most probable reason is duplicated names in param_names." | ||
) | ||
elif len(search_space) != 0: | ||
# Ensure the parameter order is identical to that in param_names. | ||
search_space = { | ||
param_name: search_space[param_name] for param_name in self._param_names | ||
} | ||
|
||
return super().sample_relative(study=study, trial=trial, search_space=search_space) | ||
|
||
def _calculate_initial_params( | ||
self, trans: _SearchSpaceTransform | ||
) -> tuple[np.ndarray, float, np.ndarray]: | ||
# NOTE(nabenabe): Except this method, everything is basically based on Optuna v4.0.0. | ||
# As this class does not support some cases supported by Optuna, I simply added validation | ||
# to each method, but otherwise, nothing changed. In principle, if users find a bug, it is | ||
# likely that the bug exists in this method. | ||
search_space = trans._search_space.copy() | ||
if any( | ||
not isinstance(d, (IntDistribution, FloatDistribution)) for d in search_space.values() | ||
): | ||
raise ValueError("search_space cannot include categorical parameters.") | ||
if any( | ||
d.log | ||
for d in search_space.values() | ||
if isinstance(d, (FloatDistribution, IntDistribution)) | ||
): | ||
src_url = "https://hub.optuna.org/samplers/user_prior_cmaes/" | ||
raise ValueError( | ||
"search_space for user_prior cannot include log scale. " | ||
f"Please use the workaround described in {src_url}." | ||
) | ||
|
||
dim = len(self._param_names) | ||
raw_bounds = trans._raw_bounds | ||
domain_sizes = raw_bounds[:, 1] - raw_bounds[:, 0] | ||
is_single = domain_sizes == 0.0 | ||
|
||
mu0 = self._mu0.copy() | ||
mu0[is_single] = 0.5 | ||
# Clip into [0, 1]. | ||
mu0[~is_single] = (mu0[~is_single] - raw_bounds[~is_single, 0]) / domain_sizes[~is_single] | ||
|
||
# We also need to transform the covariance matrix accordingly to adapt to the [0, 1] scale. | ||
cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis]) | ||
|
||
# Make the determinant of cov0 1 so that it agrees with the CMA-ES convention. | ||
sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim) | ||
# Avoid ZeroDivisionError in cmaes. | ||
sigma0 = max(sigma0, 1e-10) | ||
cov0 /= sigma0**2 | ||
|
||
return mu0, sigma0, cov0 | ||
|
||
def _init_optimizer( | ||
self, | ||
trans: _SearchSpaceTransform, | ||
direction: StudyDirection, | ||
population_size: int | None = None, | ||
randomize_start_point: bool = False, | ||
) -> CmaClass: | ||
n_dimension = len(trans.bounds) | ||
mu0, sigma0, cov0 = self._calculate_initial_params(trans) | ||
|
||
if self._with_margin: | ||
steps = np.empty(len(trans._search_space), dtype=float) | ||
for i, dist in enumerate(trans._search_space.values()): | ||
assert isinstance(dist, (IntDistribution, FloatDistribution)) | ||
# Set step 0.0 for continuous search space. | ||
if dist.step is None or dist.log: | ||
steps[i] = 0.0 | ||
elif dist.low == dist.high: | ||
steps[i] = 1.0 | ||
else: | ||
steps[i] = dist.step / (dist.high - dist.low) | ||
|
||
return cmaes.CMAwM( | ||
mean=mu0, | ||
sigma=sigma0, | ||
bounds=trans.bounds, | ||
steps=steps, | ||
cov=cov0, | ||
seed=self._cma_rng.rng.randint(1, 2**31 - 2), | ||
n_max_resampling=10 * n_dimension, | ||
population_size=population_size, | ||
) | ||
|
||
return cmaes.CMA( | ||
mean=mu0, | ||
sigma=sigma0, | ||
cov=cov0, | ||
bounds=trans.bounds, | ||
seed=self._cma_rng.rng.randint(1, 2**31 - 2), | ||
n_max_resampling=10 * n_dimension, | ||
population_size=population_size, | ||
lr_adapt=self._lr_adapt, | ||
) |