optuna · nabenabe0928 · Nov 25, 2024 · Oct 31, 2024 · Oct 31, 2024 · Nov 6, 2024
diff --git a/package/samplers/cmamae/LICENSE b/package/samplers/cmamae/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Bryon Tjanaka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/samplers/cmamae/README.md b/package/samplers/cmamae/README.md
@@ -0,0 +1,110 @@
+---
+author: Bryon Tjanaka
+title: Please fill in the title of the feature here. (e.g., Gaussian-Process Expected Improvement Sampler)
+description: Please fill in the description of the feature here. (e.g., This sampler searches for each trial based on expected improvement using Gaussian process.)
+tags: [Please fill in the list of tags here. (e.g., sampler, visualization, pruner)]
+optuna_versions: ['Please fill in the list of versions of Optuna in which you have confirmed the feature works, e.g., 3.6.1.']
+license: MIT License
+---
+
+<!--
+This is an example of the frontmatters.
+All columns must be string.
+You can omit quotes when value types are not ambiguous.
+For tags, a package placed in
+- package/samplers/ must include the tag "sampler"
+- package/visualilzation/ must include the tag "visualization"
+- package/pruners/ must include the tag "pruner"
+respectively.
+
+---
+author: Optuna team
+title: My Sampler
+description: A description for My Sampler.
+tags: [sampler, 2nd tag for My Sampler, 3rd tag for My Sampler]
+optuna_versions: [3.6.1]
+license: "MIT License"
+---
+-->
+
+Please read the [tutorial guide](https://optuna.github.io/optunahub-registry/recipes/001_first.html) to register your feature in OptunaHub.
+You can find more detailed explanation of the following contents in the tutorial.
+Looking at [other packages' implementations](https://github.com/optuna/optunahub-registry/tree/main/package) will also help you.
+
+## Abstract
+
+You can provide an abstract for your package here.
+This section will help attract potential users to your package.
+
+**Example**
+
+This package provides a sampler based on Gaussian process-based Bayesian optimization. The sampler is highly sample-efficient, so it is suitable for computationally expensive optimization problems with a limited evaluation budget, such as hyperparameter optimization of machine learning algorithms.
+
+## Class or Function Names
+
+Please fill in the class/function names which you implement here.
+
+**Example**
+
+- GPSampler
+
+## Installation
+
+If you have additional dependencies, please fill in the installation guide here.
+If no additional dependencies is required, **this section can be removed**.
+
+**Example**
+
+```shell
+$ pip install scipy torch
+```
+
+If your package has `requirements.txt`, it will be automatically uploaded to the OptunaHub, and the package dependencies will be available to install as follows.
+
+```shell
+ pip install -r https://hub.optuna.org/{category}/{your_package_name}/requirements.txt
+```
+
+## Example
+
+Please fill in the code snippet to use the implemented feature here.
+
+**Example**
+
+```python
+import optuna
+import optunahub
+
+
+def objective(trial):
+  x = trial.suggest_float("x", -5, 5)
+  return x**2
+
+
+sampler = optunahub.load_module(package="samplers/gp").GPSampler()
+study = optuna.create_study(sampler=sampler)
+study.optimize(objective, n_trials=100)
+```
+
+## Others
+
+Please fill in any other information if you have here by adding child sections (###).
+If there is no additional information, **this section can be removed**.
+
+<!--
+For example, you can add sections to introduce a corresponding paper.
+
+### Reference
+Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. 2019.
+Optuna: A Next-generation Hyperparameter Optimization Framework. In KDD.
+
+### Bibtex
+```
+@inproceedings{optuna_2019,
+    title={Optuna: A Next-generation Hyperparameter Optimization Framework},
+    author={Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
+    booktitle={Proceedings of the 25th {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
+    year={2019}
+}
+```
+-->
diff --git a/package/samplers/cmamae/__init__.py b/package/samplers/cmamae/__init__.py
@@ -0,0 +1,3 @@
+from .sampler import CmaMaeSampler
+
+__all__ = ["CmaMaeSampler"]
diff --git a/package/samplers/cmamae/example.py b/package/samplers/cmamae/example.py
@@ -0,0 +1,50 @@
+import optuna
+import optunahub
+from optuna.study import StudyDirection
+
+from sampler import CmaMaeSampler
+
+# TODO: Replace above import with this.
+#  module = optunahub.load_module("samplers/pyribs")
+#  PyribsSampler = module.PyribsSampler
+
+
+def objective(trial: optuna.trial.Trial) -> float:
+    x = trial.suggest_float("x", -10, 10)
+    y = trial.suggest_float("y", -10, 10)
+    return -(x**2 + y**2) + 2, x, y
+
+
+if __name__ == "__main__":
+    sampler = CmaMaeSampler(
+        param_names=["x", "y"],
+        archive_dims=[20, 20],
+        archive_ranges=[(-10, 10), (-10, 10)],
+        archive_learning_rate=0.1,
+        archive_threshold_min=-10,
+        n_emitters=1,
+        emitter_x0={
+            "x": 5,
+            "y": 5
+        },
+        emitter_sigma0=0.1,
+        emitter_batch_size=5,
+    )
+    study = optuna.create_study(
+        sampler=sampler,
+        directions=[
+            # pyribs maximizes objectives.
+            StudyDirection.MAXIMIZE,
+            # The remaining values are measures, which do not have an
+            # optimization direction.
+            # TODO: Currently, using StudyDirection.NOT_SET is not allowed as
+            # Optuna assumes we either minimize or maximize.
+            StudyDirection.MINIMIZE,
+            StudyDirection.MINIMIZE,
+        ],
+    )
+    study.optimize(objective, n_trials=100)
+
+    # TODO: Visualization.
+    #  fig = optuna.visualization.plot_optimization_history(study)
+    #  fig.write_image("cmamae_optimization_history.png")
diff --git a/package/samplers/cmamae/requirements.txt b/package/samplers/cmamae/requirements.txt
@@ -0,0 +1,3 @@
+optuna
+optunahub
+ribs
diff --git a/package/samplers/cmamae/sampler.py b/package/samplers/cmamae/sampler.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Iterable
+
+import numpy as np
+import optunahub
+from optuna.distributions import BaseDistribution, FloatDistribution
+from optuna.study import Study
+from optuna.trial import FrozenTrial, TrialState
+from ribs.archives import GridArchive
+from ribs.emitters import EvolutionStrategyEmitter
+from ribs.schedulers import Scheduler
+
+SimpleBaseSampler = optunahub.load_module("samplers/simple").SimpleBaseSampler
+
+
+class CmaMaeSampler(SimpleBaseSampler):
+    """A sampler using CMA-MAE as implemented in pyribs.
+
+    `CMA-MAE <https://arxiv.org/abs/2205.10752>`_ is a quality diversity
+    algorithm that has demonstrated state-of-the-art performance in a variety of
+    domains. `pyribs <https://pyribs.org>`_ is a bare-bones Python library for
+    quality diversity optimization algorithms. For a primer on CMA-MAE and
+    pyribs, we recommend referring to the series of `pyribs tutorials
+    <https://docs.pyribs.org/en/stable/tutorials.html>`_.
+
+    For simplicity, this implementation provides a default instantiation of
+    CMA-MAE with a `GridArchive
+    <https://docs.pyribs.org/en/stable/api/ribs.archives.GridArchive.html>`_ and
+    `EvolutionStrategyEmitter
+    <https://docs.pyribs.org/en/stable/api/ribs.emitters.EvolutionStrategyEmitter.html>`_
+    with improvement ranking, all wrapped up in a `Scheduler
+    <https://docs.pyribs.org/en/stable/api/ribs.schedulers.Scheduler.html>`_.
+
+    Args:
+        param_names: List of names of parameters to optimize.
+        archive_dims: Number of archive cells in each dimension of the measure
+            space, e.g. ``[20, 30, 40]`` indicates there should be 3 dimensions
+            with 20, 30, and 40 cells. (The number of dimensions is implicitly
+            defined in the length of this argument).
+        archive_ranges: Upper and lower bound of each dimension of the measure
+            space for the archive, e.g. ``[(-1, 1), (-2, 2)]`` indicates the
+            first dimension should have bounds :math:`[-1,1]` (inclusive), and
+            the second dimension should have bounds :math:`[-2,2]` (inclusive).
+            ``ranges`` should be the same length as ``dims``.
+        archive_learning_rate: The learning rate for threshold updates in the
+            archive.
+        archive_threshold_min: The initial threshold value for all the cells in
+            the archive.
+        n_emitters: Number of emitters to use in CMA-MAE.
+        emitter_x0: Mapping from parameter names to their initial values.
+        emitter_sigma0: Initial step size / standard deviation of the
+            distribution from which solutions are sampled in the emitter.
+        emitter_batch_size: Number of solutions for each emitter to generate on
+            each iteration.
+    """
+
+    def __init__(
+        self,
+        *,
+        param_names: list[str],
+        archive_dims: list[int],
+        archive_ranges: list[tuple[float, float]],
+        archive_learning_rate: float,
+        archive_threshold_min: float,
+        n_emitters: int,
+        emitter_x0: dict[str, float],
+        emitter_sigma0: float,
+        emitter_batch_size: int,
+    ) -> None:
+
+        self._validate_params(param_names, emitter_x0)
+        self._param_names = param_names[:]
+
+        # NOTE: SimpleBaseSampler must know Optuna search_space information.
+        search_space = {
+            name: FloatDistribution(-1e9, 1e9) for name in self._param_names
+        }
+        super().__init__(search_space=search_space)
+
+        emitter_x0_np = self._convert_to_pyribs_params(emitter_x0)
+
+        archive = GridArchive(
+            solution_dim=len(param_names),
+            dims=archive_dims,
+            ranges=archive_ranges,
+            learning_rate=archive_learning_rate,
+            threshold_min=archive_threshold_min,
+        )
+        result_archive = GridArchive(
+            solution_dim=len(param_names),
+            dims=archive_dims,
+            ranges=archive_ranges,
+        )
+        emitters = [
+            EvolutionStrategyEmitter(
+                archive,
+                x0=emitter_x0_np,
+                sigma0=emitter_sigma0,
+                ranker="imp",
+                selection_rule="mu",
+                restart_rule="basic",
+                batch_size=emitter_batch_size,
+            ) for _ in range(n_emitters)
+        ]
+
+        # Number of solutions generated in each batch from pyribs.
+        self._batch_size = n_emitters * emitter_batch_size
+
+        self._scheduler = Scheduler(
+            archive,
+            emitters,
+            result_archive=result_archive,
+        )
+
+        self._values_to_tell: list[list[float]] = []
+
+    def _validate_params(self, param_names: list[str],
+                         emitter_x0: dict[str, float]) -> None:
+        dim = len(param_names)
+        param_set = set(param_names)
+        if dim != len(param_set):
+            raise ValueError(
+                "Some elements in param_names are duplicated. Please make it a unique list."
+            )
+
+        if set(param_names) != emitter_x0.keys():
+            raise ValueError(
+                "emitter_x0 does not contain the parameters listed in param_names. "
+                "Please provide an initial value for each parameter.")
+
+    def _validate_param_names(self, given_param_names: Iterable[str]) -> None:
+        if set(self._param_names) != set(given_param_names):
+            raise ValueError("The given param names must match the param names "
+                             "initially passed to this sampler.")
+
+    def _convert_to_pyribs_params(self, params: dict[str, float]) -> np.ndarray:
+        np_params = np.empty(len(self._param_names), dtype=float)
+        for i, p in enumerate(self._param_names):
+            np_params[i] = params[p]
+        return np_params
+
+    def _convert_to_optuna_params(self, params: np.ndarray) -> dict[str, float]:
+        dict_params = {}
+        for i, p in enumerate(self._param_names):
+            dict_params[p] = params[i]
+        return dict_params
+
+    def sample_relative(
+            self, study: Study, trial: FrozenTrial,
+            search_space: dict[str, BaseDistribution]) -> dict[str, float]:
+        self._validate_param_names(search_space.keys())
+
+        # Note: Batch optimization means we need to enqueue trials.
+        solutions = self._scheduler.ask()
+        next_params = self._convert_to_optuna_params(solutions[0])
+        for solution in solutions[1:]:
+            params = self._convert_to_optuna_params(solution)
+            study.enqueue_trial(params)
+
+        return next_params
+
+    def after_trial(
+        self,
+        study: Study,
+        trial: FrozenTrial,
+        state: TrialState,
+        values: Sequence[float] | None,
+    ) -> None:
+        # TODO: Is it safe to assume the parameters will always come back in the
+        # order that they were sent out by the scheduler? Pyribs makes that
+        # assumption and stores the solutions internally. If not, maybe we can
+        # retrieve solutions based on their trial ID?
+
+        self._validate_param_names(trial.params.keys())
+
+        # Store the trial result.
+        self._values_to_tell.append(values)
+
+        # If we have not retrieved the whole batch of solutions, then we should
+        # not tell() the results to the scheduler yet.
+        if len(self._values_to_tell) != self._batch_size:
+            return
+
+        # Tell the batch results to external sampler once the batch is ready.
+        values = np.asarray(self._values_to_tell)
+        self._scheduler.tell(objective=values[:, 0], measures=values[:, 1:])
+
+        # Empty the results.
+        self._values_to_tell = []
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .sampler import CmaMaeSampler

		__all__ = ["CmaMaeSampler"]