diff --git a/package/samplers/user_prior_cmaes/LICENSE b/package/samplers/user_prior_cmaes/LICENSE new file mode 100644 index 00000000..f6547d35 --- /dev/null +++ b/package/samplers/user_prior_cmaes/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Shuhei Watanabe + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md new file mode 100644 index 00000000..3019d902 --- /dev/null +++ b/package/samplers/user_prior_cmaes/README.md @@ -0,0 +1,92 @@ +--- +author: Shuhei Watanabe +title: CMA-ES with User Prior +description: You can provide the initial parameters, i.e. mean vector and covariance matrix, for CMA-ES with this sampler. +tags: [sampler, cma-es, meta-learning, prior] +optuna_versions: [4.0.0] +license: MIT License +--- + +## Abstract + +As the Optuna CMA-ES sampler does not support any flexible ways to initialize the parameters of the Gaussian distribution, so I created a workaround to do so. + +## Class or Function Names + +- UserPriorCmaEsSampler + +In principle, most arguments follow [`optuna.samplers.CmaEsSampler`](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html), but some parts are modified. + +For example, `UserPriorCmaEsSampler` does not support `source_trials` and `use_separable_cma` due to their incompatibility. +Instead, we replaced `x0` and `sigma0` in `CmaEsSampler` with `mu0` and `cov0`. +In `CmaEsSampler`, we needed to provide `x0` as `dict` and `sigma0` only as `float`. +By adding `param_names` to the requirement, we can now give `mu0` (previously `x0`) and `cov0` (previously `sigma0`) as `np.ndarray`. +Note that the order of each dimension in `mu0` and `cov0` must be consistent with that in `param_names`. + +## Installation + +```shell +$ pip install optunahub cmaes +``` + +## Example + +The simplest code example is as follows: + +```python +import numpy as np +import optuna +import optunahub + + +def objective(trial: optuna.Trial) -> float: + x = trial.suggest_float("x", -50, -40) + y = trial.suggest_int("y", -5, 5) + return (x + 43)**2 + (y - 2)**2 + + +if __name__ == "__main__": + module = optunahub.load_module(package="samplers/user_prior_cmaes") + # ``with_margin=True`` because the search space has an integer parameter. + sampler = module.UserPriorCmaEsSampler( + param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]), with_margin=True + ) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=20) + print(study.best_trial.value, study.best_trial.params) + +``` + +Although `UserPriorCmaEsSampler` CANNOT support log scale from the sampler side, we have a workaround to do so: + +```python +import math + +import numpy as np +import optuna +import optunahub + + +def objective(trial: optuna.Trial) -> float: + # For example, trial.suggest_float("x", 1e-5, 1.0, log=True) can be encoded as: + x = 10 ** trial.suggest_float("log10_x", -5, 0) + # trial.suggest_float("y", 2, 1024, log=True) can be encoded as: + y = 2 ** trial.suggest_float("log2_y", 1, 10) + # In general, trial.suggest_float("z", low, high, log=True) can be encoded as: + low, high = 3, 81 + b = 3 # The base of log can be any positive number. + z = b ** trial.suggest_float("logb_z", math.log(low, b), math.log(high, b)) + return x**2 + y**2 + z**2 + + +if __name__ == "__main__": + module = optunahub.load_module(package="samplers/user_prior_cmaes") + sampler = module.UserPriorCmaEsSampler( + param_names=["log10_x", "log2_y", "logb_z"], + mu0=np.array([-4, 8, 3]), + cov0=np.diag([0.2, 1., 0.1]), + ) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=20) + print(study.best_trial.value, study.best_trial.params) +``` diff --git a/package/samplers/user_prior_cmaes/__init__.py b/package/samplers/user_prior_cmaes/__init__.py new file mode 100644 index 00000000..ed003499 --- /dev/null +++ b/package/samplers/user_prior_cmaes/__init__.py @@ -0,0 +1,4 @@ +from .sampler import UserPriorCmaEsSampler + + +__all__ = ["UserPriorCmaEsSampler"] diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py new file mode 100644 index 00000000..bf6fe28e --- /dev/null +++ b/package/samplers/user_prior_cmaes/sampler.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import math +from typing import Any +from typing import Union + +import cmaes +import numpy as np +from optuna import Study +from optuna._transform import _SearchSpaceTransform +from optuna.distributions import BaseDistribution +from optuna.distributions import FloatDistribution +from optuna.distributions import IntDistribution +from optuna.samplers import BaseSampler +from optuna.samplers import CmaEsSampler +from optuna.study import StudyDirection +from optuna.trial import FrozenTrial + + +CmaClass = Union[cmaes.CMA, cmaes.SepCMA, cmaes.CMAwM] + + +class UserPriorCmaEsSampler(CmaEsSampler): + """A sampler using `cmaes `__ as the backend with user prior. + + Please check ``CmaEsSampler`` in Optuna for more details of each argument. + This class modified the arguments ``x0`` and ``sigma0`` in ``CmaEsSampler`` of Optuna. + Furthermore, due to the incompatibility, + This class does not support ``source_trials`` and ``use_separable_cma``. + + Args: + param_names: + The list of the parameter names to be tuned. This list must be a unique list. + mu0: + The mean vector used for the initialization of CMA-ES. + cov0: + The covariance matrix used for the initialization of CMA-ES. + """ # NOQA: E501 + + def __init__( + self, + param_names: list[str], + mu0: np.ndarray, + cov0: np.ndarray, + n_startup_trials: int = 1, + independent_sampler: BaseSampler | None = None, + warn_independent_sampling: bool = True, + seed: int | None = None, + *, + consider_pruned_trials: bool = False, + restart_strategy: str | None = None, + popsize: int | None = None, + inc_popsize: int = 2, + with_margin: bool = False, + lr_adapt: bool = False, + ) -> None: + super().__init__( + x0=None, + sigma0=None, + n_startup_trials=n_startup_trials, + independent_sampler=independent_sampler, + warn_independent_sampling=warn_independent_sampling, + seed=seed, + consider_pruned_trials=consider_pruned_trials, + restart_strategy=restart_strategy, + popsize=popsize, + inc_popsize=inc_popsize, + use_separable_cma=False, + with_margin=with_margin, + lr_adapt=lr_adapt, + source_trials=None, + ) + self._validate_user_prior(param_names, mu0, cov0) + self._param_names = param_names[:] + self._mu0 = mu0.astype(float) + self._cov0 = cov0.astype(float) + + def _validate_user_prior( + self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray + ) -> None: + dim = len(param_names) + if dim != len(set(param_names)): + raise ValueError( + "Some elements in param_names are duplicated. Please make it a unique list." + ) + if mu0.shape != (dim,) or cov0.shape != (dim, dim): + raise ValueError( + f"The shape of mu0 and cov0 must be (len(param_names)={dim}, ) and " + f"(len(param_names)={dim}, len(param_names)={dim}), but got {mu0.shape} and " + f"{cov0.shape}." + ) + if not np.allclose(cov0, cov0.T): + raise ValueError("cov0 must be a symmetric matrix.") + if np.any(cov0 < 0.0): + raise ValueError("All elements in cov0 must be non-negative.") + if np.any(np.linalg.eigvals(cov0) < 0.0): + raise ValueError("cov0 must be a semi-positive definite matrix.") + + def sample_relative( + self, + study: Study, + trial: FrozenTrial, + search_space: dict[str, BaseDistribution], + ) -> dict[str, Any]: + if len(search_space) != 0 and set(search_space.keys()) != set(self._param_names): + raise ValueError( + "The keys in search_space and param_names did not match. " + "The most probable reason is duplicated names in param_names." + ) + elif len(search_space) != 0: + # Ensure the parameter order is identical to that in param_names. + search_space = { + param_name: search_space[param_name] for param_name in self._param_names + } + + return super().sample_relative(study=study, trial=trial, search_space=search_space) + + def _calculate_initial_params( + self, trans: _SearchSpaceTransform + ) -> tuple[np.ndarray, float, np.ndarray]: + # NOTE(nabenabe): Except this method, everything is basically based on Optuna v4.0.0. + # As this class does not support some cases supported by Optuna, I simply added validation + # to each method, but otherwise, nothing changed. In principle, if users find a bug, it is + # likely that the bug exists in this method. + search_space = trans._search_space.copy() + if any( + not isinstance(d, (IntDistribution, FloatDistribution)) for d in search_space.values() + ): + raise ValueError("search_space cannot include categorical parameters.") + if any( + d.log + for d in search_space.values() + if isinstance(d, (FloatDistribution, IntDistribution)) + ): + src_url = "https://hub.optuna.org/samplers/user_prior_cmaes/" + raise ValueError( + "search_space for user_prior cannot include log scale. " + f"Please use the workaround described in {src_url}." + ) + + dim = len(self._param_names) + raw_bounds = trans._raw_bounds + domain_sizes = raw_bounds[:, 1] - raw_bounds[:, 0] + is_single = domain_sizes == 0.0 + + mu0 = self._mu0.copy() + mu0[is_single] = 0.5 + # Clip into [0, 1]. + mu0[~is_single] = (mu0[~is_single] - raw_bounds[~is_single, 0]) / domain_sizes[~is_single] + + # We also need to transform the covariance matrix accordingly to adapt to the [0, 1] scale. + cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis]) + + # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention. + sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim) + # Avoid ZeroDivisionError in cmaes. + sigma0 = max(sigma0, 1e-10) + cov0 /= sigma0**2 + + return mu0, sigma0, cov0 + + def _init_optimizer( + self, + trans: _SearchSpaceTransform, + direction: StudyDirection, + population_size: int | None = None, + randomize_start_point: bool = False, + ) -> CmaClass: + n_dimension = len(trans.bounds) + mu0, sigma0, cov0 = self._calculate_initial_params(trans) + + if self._with_margin: + steps = np.empty(len(trans._search_space), dtype=float) + for i, dist in enumerate(trans._search_space.values()): + assert isinstance(dist, (IntDistribution, FloatDistribution)) + # Set step 0.0 for continuous search space. + if dist.step is None or dist.log: + steps[i] = 0.0 + elif dist.low == dist.high: + steps[i] = 1.0 + else: + steps[i] = dist.step / (dist.high - dist.low) + + return cmaes.CMAwM( + mean=mu0, + sigma=sigma0, + bounds=trans.bounds, + steps=steps, + cov=cov0, + seed=self._cma_rng.rng.randint(1, 2**31 - 2), + n_max_resampling=10 * n_dimension, + population_size=population_size, + ) + + return cmaes.CMA( + mean=mu0, + sigma=sigma0, + cov=cov0, + bounds=trans.bounds, + seed=self._cma_rng.rng.randint(1, 2**31 - 2), + n_max_resampling=10 * n_dimension, + population_size=population_size, + lr_adapt=self._lr_adapt, + )