From a1c28660d6478ddf148b042e1ea59d0040cbaf67 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 09:53:25 +0200
Subject: [PATCH 01/14] Add CMA-ES with user prior

---
 package/samplers/user_prior_cmaes/LICENSE     |  21 +++
 package/samplers/user_prior_cmaes/README.md   | 110 ++++++++++++
 package/samplers/user_prior_cmaes/__init__.py |   4 +
 package/samplers/user_prior_cmaes/sampler.py  | 164 ++++++++++++++++++
 4 files changed, 299 insertions(+)
 create mode 100644 package/samplers/user_prior_cmaes/LICENSE
 create mode 100644 package/samplers/user_prior_cmaes/README.md
 create mode 100644 package/samplers/user_prior_cmaes/__init__.py
 create mode 100644 package/samplers/user_prior_cmaes/sampler.py

diff --git a/package/samplers/user_prior_cmaes/LICENSE b/package/samplers/user_prior_cmaes/LICENSE
new file mode 100644
index 00000000..f6547d35
--- /dev/null
+++ b/package/samplers/user_prior_cmaes/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Shuhei Watanabe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
new file mode 100644
index 00000000..d4e40702
--- /dev/null
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -0,0 +1,110 @@
+---
+author: Please fill in the author name here. (e.g., John Smith)
+title: Please fill in the title of the feature here. (e.g., Gaussian-Process Expected Improvement Sampler)
+description: Please fill in the description of the feature here. (e.g., This sampler searches for each trial based on expected improvement using Gaussian process.)
+tags: [Please fill in the list of tags here. (e.g., sampler, visualization, pruner)]
+optuna_versions: ['Please fill in the list of versions of Optuna in which you have confirmed the feature works, e.g., 3.6.1.']
+license: MIT License
+---
+
+<!--
+This is an example of the frontmatters.
+All columns must be string.
+You can omit quotes when value types are not ambiguous.
+For tags, a package placed in
+- package/samplers/ must include the tag "sampler"
+- package/visualilzation/ must include the tag "visualization"
+- package/pruners/ must include the tag "pruner"
+respectively.
+
+---
+author: Optuna team
+title: My Sampler
+description: A description for My Sampler.
+tags: [sampler, 2nd tag for My Sampler, 3rd tag for My Sampler]
+optuna_versions: [3.6.1]
+license: "MIT License"
+---
+-->
+
+Please read the [tutorial guide](https://optuna.github.io/optunahub-registry/recipes/001_first.html) to register your feature in OptunaHub.
+You can find more detailed explanation of the following contents in the tutorial.
+Looking at [other packages' implementations](https://github.com/optuna/optunahub-registry/tree/main/package) will also help you.
+
+## Abstract
+
+You can provide an abstract for your package here.
+This section will help attract potential users to your package.
+
+**Example**
+
+This package provides a sampler based on Gaussian process-based Bayesian optimization. The sampler is highly sample-efficient, so it is suitable for computationally expensive optimization problems with a limited evaluation budget, such as hyperparameter optimization of machine learning algorithms.
+
+## Class or Function Names
+
+Please fill in the class/function names which you implement here.
+
+**Example**
+
+- GPSampler
+
+## Installation
+
+If you have additional dependencies, please fill in the installation guide here.
+If no additional dependencies is required, **this section can be removed**.
+
+**Example**
+
+```shell
+$ pip install scipy torch
+```
+
+If your package has `requirements.txt`, it will be automatically uploaded to the OptunaHub, and the package dependencies will be available to install as follows.
+
+```shell
+ pip install -r https://hub.optuna.org/{category}/{your_package_name}/requirements.txt
+```
+
+## Example
+
+Please fill in the code snippet to use the implemented feature here.
+
+**Example**
+
+```python
+import optuna
+import optunahub
+
+
+def objective(trial):
+  x = trial.suggest_float("x", -5, 5)
+  return x**2
+
+
+sampler = optunahub.load_module(package="samplers/gp").GPSampler()
+study = optuna.create_study(sampler=sampler)
+study.optimize(objective, n_trials=100)
+```
+
+## Others
+
+Please fill in any other information if you have here by adding child sections (###).
+If there is no additional information, **this section can be removed**.
+
+<!--
+For example, you can add sections to introduce a corresponding paper.
+
+### Reference
+Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. 2019.
+Optuna: A Next-generation Hyperparameter Optimization Framework. In KDD.
+
+### Bibtex
+```
+@inproceedings{optuna_2019,
+    title={Optuna: A Next-generation Hyperparameter Optimization Framework},
+    author={Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
+    booktitle={Proceedings of the 25th {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
+    year={2019}
+}
+```
+-->
diff --git a/package/samplers/user_prior_cmaes/__init__.py b/package/samplers/user_prior_cmaes/__init__.py
new file mode 100644
index 00000000..ed003499
--- /dev/null
+++ b/package/samplers/user_prior_cmaes/__init__.py
@@ -0,0 +1,4 @@
+from .sampler import UserPriorCmaEsSampler
+
+
+__all__ = ["UserPriorCmaEsSampler"]
diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
new file mode 100644
index 00000000..9b84981b
--- /dev/null
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+import math
+from typing import Any
+
+import cmaes
+import numpy as np
+from optuna import Study
+from optuna._transform import _SearchSpaceTransform
+from optuna.distributions import BaseDistribution
+from optuna.distributions import FloatDistribution
+from optuna.distributions import IntDistribution
+from optuna.samplers import BaseSampler
+from optuna.samplers import CmaClass
+from optuna.samplers import CmaEsSampler
+from optuna.study import StudyDirection
+from optuna.trial import FrozenTrial
+
+
+class UserPriorCmaEsSampler(CmaEsSampler):
+    """A sampler using `cmaes <https://github.com/CyberAgentAILab/cmaes>`__ as the backend with user prior.
+
+    Please check ``CmaEsSampler`` in Optuna for more details of each argument.
+    This class modified the arguments ``x0`` and ``sigma0`` in ``CmaEsSampler`` of Optuna.
+    Furthermore, due to the incompatibility,
+    This class does not support ``source_trials`` and ``use_separable_cma`` due to their incompatibility.
+
+    Args:
+        x0:
+            A dictionary of an initial parameter values for CMA-ES. By default, the mean of ``low``
+            and ``high`` for each distribution is used. Note that ``x0`` is sampled uniformly
+            within the search space domain for each restart if you specify ``restart_strategy``
+            argument.
+
+        sigma0:
+            Initial standard deviation of CMA-ES. By default, ``sigma0`` is set to
+            ``min_range / 6``, where ``min_range`` denotes the minimum range of the distributions
+            in the search space.
+    """  # NOQA: E501
+
+    def __init__(
+        self,
+        param_names: list[str],
+        mu0: np.ndarray,
+        cov0: np.ndarray,
+        n_startup_trials: int = 1,
+        independent_sampler: BaseSampler | None = None,
+        warn_independent_sampling: bool = True,
+        seed: int | None = None,
+        *,
+        consider_pruned_trials: bool = False,
+        restart_strategy: str | None = None,
+        popsize: int | None = None,
+        inc_popsize: int = 2,
+        with_margin: bool = False,
+        lr_adapt: bool = False,
+    ) -> None:
+        super().__init__(
+            x0=None,
+            sigma0=None,
+            n_startup_trials=n_startup_trials,
+            independent_sampler=independent_sampler,
+            warn_independent_sampling=warn_independent_sampling,
+            seed=seed,
+            consider_pruned_trials=consider_pruned_trials,
+            restart_strategy=restart_strategy,
+            popsize=popsize,
+            inc_popsize=inc_popsize,
+            use_separable_cma=False,
+            with_margin=with_margin,
+            lr_adapt=lr_adapt,
+            source_trials=None,
+        )
+        self._validate_user_prior(param_names, mu0, cov0)
+        dim = len(param_names)
+        self._param_names = param_names[:]
+        self._mu0 = mu0.copy()
+        self._sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
+        # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
+        self._cov0 = cov0.copy() / self._sigma0**2
+
+    def _validate_user_prior(
+        self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray
+    ) -> None:
+        dim = len(param_names)
+        if dim != len(set(param_names)):
+            raise ValueError(
+                "Some elements in param_names are duplicated. Please make it a unique list."
+            )
+        if mu0.shape != (dim,) or cov0.shape != (dim, dim):
+            raise ValueError(
+                f"The shape of mu0 and cov0 must be (len(param_names)={dim}, ) and "
+                f"(len(param_names)={dim}, len(param_names)={dim}), but got {mu0.shape} and "
+                f"{cov0.shape}."
+            )
+        if not np.allclose(cov0, cov0.T):
+            raise ValueError("cov0 must be a symmetric matrix.")
+        if np.any(cov0 < 0.0):
+            raise ValueError("All elements in cov0 must be non-negative.")
+        if np.any(np.linalg.eigvals(cov0) < 0.0):
+            raise ValueError("cov0 must be a semi-positive definite matrix.")
+
+    def sample_relative(
+        self,
+        study: Study,
+        trial: FrozenTrial,
+        search_space: dict[str, BaseDistribution],
+    ) -> dict[str, Any]:
+        if len(search_space) != 0 and set(search_space.keys()) != set(self._param_names):
+            raise
+        elif len(search_space) != 0:
+            search_space = {
+                param_name: search_space[param_name] for param_name in self._param_names
+            }
+
+        return super().sample_relative(study=study, trial=trial, search_space=search_space)
+
+    def _init_optimizer(
+        self,
+        trans: _SearchSpaceTransform,
+        direction: StudyDirection,
+        population_size: int | None = None,
+        randomize_start_point: bool = False,
+    ) -> CmaClass:
+        n_dimension = len(trans.bounds)
+        mean = self._mu0.copy()
+        cov = self._cov0.copy()
+
+        # Avoid ZeroDivisionError in cmaes.
+        sigma0 = max(self._sigma0, 1e-10)
+
+        if self._with_margin:
+            steps = np.empty(len(trans._search_space), dtype=float)
+            for i, dist in enumerate(trans._search_space.values()):
+                assert isinstance(dist, (IntDistribution, FloatDistribution))
+                # Set step 0.0 for continuous search space.
+                if dist.step is None or dist.log:
+                    steps[i] = 0.0
+                elif dist.low == dist.high:
+                    steps[i] = 1.0
+                else:
+                    steps[i] = dist.step / (dist.high - dist.low)
+
+            return cmaes.CMAwM(
+                mean=mean,
+                sigma=sigma0,
+                bounds=trans.bounds,
+                steps=steps,
+                cov=cov,
+                seed=self._cma_rng.rng.randint(1, 2**31 - 2),
+                n_max_resampling=10 * n_dimension,
+                population_size=population_size,
+            )
+
+        return cmaes.CMA(
+            mean=mean,
+            sigma=sigma0,
+            cov=cov,
+            bounds=trans.bounds,
+            seed=self._cma_rng.rng.randint(1, 2**31 - 2),
+            n_max_resampling=10 * n_dimension,
+            population_size=population_size,
+            lr_adapt=self._lr_adapt,
+        )

From bef025ef807a0247f92db268ce6021cccb5137e5 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 10:01:15 +0200
Subject: [PATCH 02/14] Fix import error

---
 package/samplers/user_prior_cmaes/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 9b84981b..0fc82f28 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -11,8 +11,8 @@
 from optuna.distributions import FloatDistribution
 from optuna.distributions import IntDistribution
 from optuna.samplers import BaseSampler
-from optuna.samplers import CmaClass
 from optuna.samplers import CmaEsSampler
+from optuna.samplers._cmaes import CmaClass
 from optuna.study import StudyDirection
 from optuna.trial import FrozenTrial
 

From ca53af34852db505cdbf1dff34dadd5b3bf16420 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 10:02:48 +0200
Subject: [PATCH 03/14] Fix import error

---
 package/samplers/user_prior_cmaes/sampler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 0fc82f28..9c601cc1 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -2,6 +2,7 @@
 
 import math
 from typing import Any
+from typing import Union
 
 import cmaes
 import numpy as np
@@ -12,11 +13,13 @@
 from optuna.distributions import IntDistribution
 from optuna.samplers import BaseSampler
 from optuna.samplers import CmaEsSampler
-from optuna.samplers._cmaes import CmaClass
 from optuna.study import StudyDirection
 from optuna.trial import FrozenTrial
 
 
+CmaClass = Union[cmaes.CMA, cmaes.SepCMA, cmaes.CMAwM]
+
+
 class UserPriorCmaEsSampler(CmaEsSampler):
     """A sampler using `cmaes <https://github.com/CyberAgentAILab/cmaes>`__ as the backend with user prior.
 

From 1b644152415d2b46eb763079d0bb77ab648288a0 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:08:09 +0200
Subject: [PATCH 04/14] Correct the scale of each param

---
 package/samplers/user_prior_cmaes/sampler.py | 73 ++++++++++++++------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 9c601cc1..8201f2b0 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -29,16 +29,12 @@ class UserPriorCmaEsSampler(CmaEsSampler):
     This class does not support ``source_trials`` and ``use_separable_cma`` due to their incompatibility.
 
     Args:
-        x0:
-            A dictionary of an initial parameter values for CMA-ES. By default, the mean of ``low``
-            and ``high`` for each distribution is used. Note that ``x0`` is sampled uniformly
-            within the search space domain for each restart if you specify ``restart_strategy``
-            argument.
-
-        sigma0:
-            Initial standard deviation of CMA-ES. By default, ``sigma0`` is set to
-            ``min_range / 6``, where ``min_range`` denotes the minimum range of the distributions
-            in the search space.
+        param_names:
+            The list of the parameter names to be tuned. This list must be a unique list.
+        mu0:
+            The mean vector used for the initialization of CMA-ES.
+        cov0:
+            The covariance matrix used for the initialization of CMA-ES.
     """  # NOQA: E501
 
     def __init__(
@@ -75,12 +71,9 @@ def __init__(
             source_trials=None,
         )
         self._validate_user_prior(param_names, mu0, cov0)
-        dim = len(param_names)
         self._param_names = param_names[:]
         self._mu0 = mu0.copy()
-        self._sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
-        # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
-        self._cov0 = cov0.copy() / self._sigma0**2
+        self._cov0 = cov0.copy()
 
     def _validate_user_prior(
         self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray
@@ -118,6 +111,44 @@ def sample_relative(
 
         return super().sample_relative(study=study, trial=trial, search_space=search_space)
 
+    def _calculate_initial_params(
+        self, trans: _SearchSpaceTransform
+    ) -> tuple[np.ndarray, float, np.ndarray]:
+        search_space = trans._search_space.copy()
+        if any(
+            not isinstance(d, (IntDistribution, FloatDistribution)) for d in search_space.values()
+        ):
+            raise ValueError("search_space cannot include categorical parameters.")
+        if any(
+            d.log
+            for d in search_space.values()
+            if isinstance(d, (FloatDistribution, IntDistribution))
+        ):
+            src_url = "https://hub.optuna.org/samplers/user_prior_cmaes/"
+            raise ValueError(
+                "search_space for user_prior cannot include log scale. "
+                f"Please use the workaround described in {src_url}."
+            )
+
+        dim = len(self._param_names)
+        raw_bounds = trans._raw_bounds
+        domain_sizes = raw_bounds[:, 1] - raw_bounds[:, 0]
+        is_single = domain_sizes == 0.0
+
+        mu0 = self._mu0.copy()
+        mu0[is_single] = 0.5
+        mu0[~is_single] = (mu0[~is_single] - raw_bounds[~is_single, 0]) / domain_sizes[~is_single]
+
+        cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis])
+        sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
+        # Avoid ZeroDivisionError in cmaes.
+        sigma0 = max(self._sigma0, 1e-10)
+
+        # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
+        cov0 /= sigma0**2
+
+        return mu0, sigma0, cov0
+
     def _init_optimizer(
         self,
         trans: _SearchSpaceTransform,
@@ -126,11 +157,7 @@ def _init_optimizer(
         randomize_start_point: bool = False,
     ) -> CmaClass:
         n_dimension = len(trans.bounds)
-        mean = self._mu0.copy()
-        cov = self._cov0.copy()
-
-        # Avoid ZeroDivisionError in cmaes.
-        sigma0 = max(self._sigma0, 1e-10)
+        mu0, sigma0, cov0 = self._calculate_initial_params(trans._search_space)
 
         if self._with_margin:
             steps = np.empty(len(trans._search_space), dtype=float)
@@ -145,20 +172,20 @@ def _init_optimizer(
                     steps[i] = dist.step / (dist.high - dist.low)
 
             return cmaes.CMAwM(
-                mean=mean,
+                mean=mu0,
                 sigma=sigma0,
                 bounds=trans.bounds,
                 steps=steps,
-                cov=cov,
+                cov=cov0,
                 seed=self._cma_rng.rng.randint(1, 2**31 - 2),
                 n_max_resampling=10 * n_dimension,
                 population_size=population_size,
             )
 
         return cmaes.CMA(
-            mean=mean,
+            mean=mu0,
             sigma=sigma0,
-            cov=cov,
+            cov=cov0,
             bounds=trans.bounds,
             seed=self._cma_rng.rng.randint(1, 2**31 - 2),
             n_max_resampling=10 * n_dimension,

From 876c9397753f97c4bfcf7d1cbd7747fb17a75638 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:09:30 +0200
Subject: [PATCH 05/14] Fix

---
 package/samplers/user_prior_cmaes/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 8201f2b0..32ebba9d 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -157,7 +157,7 @@ def _init_optimizer(
         randomize_start_point: bool = False,
     ) -> CmaClass:
         n_dimension = len(trans.bounds)
-        mu0, sigma0, cov0 = self._calculate_initial_params(trans._search_space)
+        mu0, sigma0, cov0 = self._calculate_initial_params(trans)
 
         if self._with_margin:
             steps = np.empty(len(trans._search_space), dtype=float)

From 40e94888074532ed3aee09924fd6fc1d2f9c3a7e Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:11:11 +0200
Subject: [PATCH 06/14] Fix

---
 package/samplers/user_prior_cmaes/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 32ebba9d..7995ed45 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -142,7 +142,7 @@ def _calculate_initial_params(
         cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis])
         sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
         # Avoid ZeroDivisionError in cmaes.
-        sigma0 = max(self._sigma0, 1e-10)
+        sigma0 = max(sigma0, 1e-10)
 
         # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
         cov0 /= sigma0**2

From d845e1054d82b1dfe35091ab14ada80a1b46fe6e Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:23:45 +0200
Subject: [PATCH 07/14] Add more comments

---
 package/samplers/user_prior_cmaes/README.md  | 64 +++-----------------
 package/samplers/user_prior_cmaes/sampler.py |  6 +-
 2 files changed, 12 insertions(+), 58 deletions(-)

diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
index d4e40702..812f5705 100644
--- a/package/samplers/user_prior_cmaes/README.md
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -1,76 +1,28 @@
 ---
-author: Please fill in the author name here. (e.g., John Smith)
-title: Please fill in the title of the feature here. (e.g., Gaussian-Process Expected Improvement Sampler)
-description: Please fill in the description of the feature here. (e.g., This sampler searches for each trial based on expected improvement using Gaussian process.)
-tags: [Please fill in the list of tags here. (e.g., sampler, visualization, pruner)]
-optuna_versions: ['Please fill in the list of versions of Optuna in which you have confirmed the feature works, e.g., 3.6.1.']
+author: Shuhei Watanabe
+title: CMA-ES with User Prior
+description: You can provide the initial parameters, i.e. mean vector and covariance matrix, for CMA-ES with this sampler.
+tags: [sampler, cma-es, meta-learning, prior]
+optuna_versions: [4.0.0]
 license: MIT License
 ---
 
-<!--
-This is an example of the frontmatters.
-All columns must be string.
-You can omit quotes when value types are not ambiguous.
-For tags, a package placed in
-- package/samplers/ must include the tag "sampler"
-- package/visualilzation/ must include the tag "visualization"
-- package/pruners/ must include the tag "pruner"
-respectively.
-
----
-author: Optuna team
-title: My Sampler
-description: A description for My Sampler.
-tags: [sampler, 2nd tag for My Sampler, 3rd tag for My Sampler]
-optuna_versions: [3.6.1]
-license: "MIT License"
----
--->
-
-Please read the [tutorial guide](https://optuna.github.io/optunahub-registry/recipes/001_first.html) to register your feature in OptunaHub.
-You can find more detailed explanation of the following contents in the tutorial.
-Looking at [other packages' implementations](https://github.com/optuna/optunahub-registry/tree/main/package) will also help you.
-
 ## Abstract
 
-You can provide an abstract for your package here.
-This section will help attract potential users to your package.
-
-**Example**
-
-This package provides a sampler based on Gaussian process-based Bayesian optimization. The sampler is highly sample-efficient, so it is suitable for computationally expensive optimization problems with a limited evaluation budget, such as hyperparameter optimization of machine learning algorithms.
+As the Optuna CMA-ES sampler does not support any flexible ways to initialize the parameters of the Gaussian distribution, so I created a workaround to do so.
 
 ## Class or Function Names
 
-Please fill in the class/function names which you implement here.
-
-**Example**
-
-- GPSampler
+- UserPriorCmaEsSampler
 
 ## Installation
 
-If you have additional dependencies, please fill in the installation guide here.
-If no additional dependencies is required, **this section can be removed**.
-
-**Example**
-
 ```shell
-$ pip install scipy torch
-```
-
-If your package has `requirements.txt`, it will be automatically uploaded to the OptunaHub, and the package dependencies will be available to install as follows.
-
-```shell
- pip install -r https://hub.optuna.org/{category}/{your_package_name}/requirements.txt
+$ pip install optunahub cmaes
 ```
 
 ## Example
 
-Please fill in the code snippet to use the implemented feature here.
-
-**Example**
-
 ```python
 import optuna
 import optunahub
diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 7995ed45..8af1cf5a 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -137,14 +137,16 @@ def _calculate_initial_params(
 
         mu0 = self._mu0.copy()
         mu0[is_single] = 0.5
+        # Clip into [0, 1].
         mu0[~is_single] = (mu0[~is_single] - raw_bounds[~is_single, 0]) / domain_sizes[~is_single]
 
+        # We also need to transform the covariance matrix accordingly to adapt to the [0, 1] scale.
         cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis])
+
+        # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
         sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
         # Avoid ZeroDivisionError in cmaes.
         sigma0 = max(sigma0, 1e-10)
-
-        # Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
         cov0 /= sigma0**2
 
         return mu0, sigma0, cov0

From 5f8f7fc4e8426965f0c280244712ec2adeb911a3 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:39:46 +0200
Subject: [PATCH 08/14] Add examples

---
 package/samplers/user_prior_cmaes/README.md  | 65 +++++++++++++-------
 package/samplers/user_prior_cmaes/sampler.py |  4 +-
 2 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
index 812f5705..4cd1a97f 100644
--- a/package/samplers/user_prior_cmaes/README.md
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -23,40 +23,59 @@ $ pip install optunahub cmaes
 
 ## Example
 
+The simplest code example is as follows:
+
 ```python
+import numpy as np
 import optuna
 import optunahub
 
 
-def objective(trial):
-  x = trial.suggest_float("x", -5, 5)
-  return x**2
+def objective(trial: optuna.Trial) -> float:
+    x = trial.suggest_float("x", -50, -40)
+    y = trial.suggest_int("y", -5, 5)
+    return (x + 43)**2 + (y - 2)**2
+
 
+if __name__ == "__main__":
+    module = optunahub.load_module(package="samplers/user_prior_cmaes")
+    sampler = module.UserPriorCmaEsSampler(param_names=["x", "y"], mu0=np.array([3., -48.]), cov0=np.diag([0.2, 2.0]))
+    study = optuna.create_study(sampler=sampler)
+    study.optimize(objective, n_trials=20)
+    print(study.best_trial.value, study.best_trial.params)
 
-sampler = optunahub.load_module(package="samplers/gp").GPSampler()
-study = optuna.create_study(sampler=sampler)
-study.optimize(objective, n_trials=100)
 ```
 
-## Others
+Although `UserPriorCmaEsSampler` CANNOT support log scale from the sampler side, we have a workaround to do so:
 
-Please fill in any other information if you have here by adding child sections (###).
-If there is no additional information, **this section can be removed**.
+```python
+import math
 
-<!--
-For example, you can add sections to introduce a corresponding paper.
+import numpy as np
+import optuna
+import optunahub
 
-### Reference
-Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. 2019.
-Optuna: A Next-generation Hyperparameter Optimization Framework. In KDD.
 
-### Bibtex
-```
-@inproceedings{optuna_2019,
-    title={Optuna: A Next-generation Hyperparameter Optimization Framework},
-    author={Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
-    booktitle={Proceedings of the 25th {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
-    year={2019}
-}
+def objective(trial: optuna.Trial) -> float:
+    # For example, trial.suggest_float("x", 1e-5, 1.0, log=True) can be encoded as:
+    x = 10 ** trial.suggest_float("log10_x", -5, 0)
+    # trial.suggest_float("y", 2, 1024, log=True) can be encoded as:
+    y = 2 ** trial.suggest_float("log2_y", 1, 10)
+    # In general, trial.suggest_float("z", low, high, log=True) can be encoded as:
+    low, high = 3, 81
+    b = 3  # The base of log can be any positive number.
+    z = b ** trial.suggest_float("logb_z", math.log(low, b), math.log(high, b))
+    return x**2 + y**2 + z**2
+
+
+if __name__ == "__main__":
+    module = optunahub.load_module(package="samplers/user_prior_cmaes")
+    sampler = module.UserPriorCmaEsSampler(
+        param_names=["log10_x", "log2_y", "logb_z"],
+        mu0=np.array([-4, 8, 3]),
+        cov0=np.diag([0.2, 1., 0.1]),
+    )
+    study = optuna.create_study(sampler=sampler)
+    study.optimize(objective, n_trials=20)
+    print(study.best_trial.value, study.best_trial.params)
 ```
--->
diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 8af1cf5a..37766ea1 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -72,8 +72,8 @@ def __init__(
         )
         self._validate_user_prior(param_names, mu0, cov0)
         self._param_names = param_names[:]
-        self._mu0 = mu0.copy()
-        self._cov0 = cov0.copy()
+        self._mu0 = mu0.astype(float)
+        self._cov0 = cov0.astype(float)
 
     def _validate_user_prior(
         self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray

From 580a7b7bfb7b35ab2e8f402589a630bc96aabae9 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:49:29 +0200
Subject: [PATCH 09/14] Enhance README

---
 package/samplers/user_prior_cmaes/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
index 4cd1a97f..d22b4790 100644
--- a/package/samplers/user_prior_cmaes/README.md
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -15,6 +15,14 @@ As the Optuna CMA-ES sampler does not support any flexible ways to initialize th
 
 - UserPriorCmaEsSampler
 
+In principle, most arguments follow [`optuna.samplers.CmaEsSampler`](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html), but some parts are modified.
+
+For example, `UserPriorCmaEsSampler` does not support `source_trials` and `use_separable_cma` due to their incompatibility.
+Instead, we replaced `x0` and `sigma0` in `CmaEsSampler` with `mu0` and `cov0`.
+In `CmaEsSampler`, we needed to provide `x0` as `dict` and `sigma0` only as `float`.
+By adding `param_names` to the requirement, we can now give `mu0` (previously `x0`) and `cov0` (previously `sigma0`) as `np.ndarray`.
+Note that the order of each dimension in `mu0` and `cov0` must be consistent with that in `param_names`.
+
 ## Installation
 
 ```shell

From 81624092765412671a86b42d2d415f7782134ac3 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:53:13 +0200
Subject: [PATCH 10/14] Add an error message

---
 package/samplers/user_prior_cmaes/sampler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 37766ea1..ac6573d7 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -103,7 +103,10 @@ def sample_relative(
         search_space: dict[str, BaseDistribution],
     ) -> dict[str, Any]:
         if len(search_space) != 0 and set(search_space.keys()) != set(self._param_names):
-            raise
+            raise ValueError(
+                "The keys in search_space and param_names did not match. "
+                "The most probable reason is duplicated names in param_names."
+            )
         elif len(search_space) != 0:
             search_space = {
                 param_name: search_space[param_name] for param_name in self._param_names

From baf5ff3584900263e2447ed44ee49cdcc5a8595e Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 30 Sep 2024 11:56:00 +0200
Subject: [PATCH 11/14] Add a message

---
 package/samplers/user_prior_cmaes/sampler.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index ac6573d7..183facdc 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -117,6 +117,10 @@ def sample_relative(
     def _calculate_initial_params(
         self, trans: _SearchSpaceTransform
     ) -> tuple[np.ndarray, float, np.ndarray]:
+        # NOTE(nabenabe): Except this method, everything is basically based on Optuna v4.0.0.
+        # As this class does not support some cases supported by Optuna, I simply added validation
+        # to each method, but otherwise, nothing changed. In principle, if users find a bug, it is
+        # likely that the bug exists in this method.
         search_space = trans._search_space.copy()
         if any(
             not isinstance(d, (IntDistribution, FloatDistribution)) for d in search_space.values()

From 7d79b128c66b297a04b76ee378dd003d1f70c2be Mon Sep 17 00:00:00 2001
From: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com>
Date: Tue, 1 Oct 2024 02:34:34 +0900
Subject: [PATCH 12/14] Update package/samplers/user_prior_cmaes/sampler.py

---
 package/samplers/user_prior_cmaes/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 183facdc..5705aae7 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -26,7 +26,7 @@ class UserPriorCmaEsSampler(CmaEsSampler):
     Please check ``CmaEsSampler`` in Optuna for more details of each argument.
     This class modified the arguments ``x0`` and ``sigma0`` in ``CmaEsSampler`` of Optuna.
     Furthermore, due to the incompatibility,
-    This class does not support ``source_trials`` and ``use_separable_cma`` due to their incompatibility.
+    This class does not support ``source_trials`` and ``use_separable_cma``.
 
     Args:
         param_names:

From fd91c4c683fc32796e3901eeed2962b56c470c0f Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 1 Oct 2024 04:26:49 +0200
Subject: [PATCH 13/14] Fix the example

---
 package/samplers/user_prior_cmaes/README.md  | 2 +-
 package/samplers/user_prior_cmaes/sampler.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
index d22b4790..01e03663 100644
--- a/package/samplers/user_prior_cmaes/README.md
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -47,7 +47,7 @@ def objective(trial: optuna.Trial) -> float:
 
 if __name__ == "__main__":
     module = optunahub.load_module(package="samplers/user_prior_cmaes")
-    sampler = module.UserPriorCmaEsSampler(param_names=["x", "y"], mu0=np.array([3., -48.]), cov0=np.diag([0.2, 2.0]))
+    sampler = module.UserPriorCmaEsSampler(param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]))
     study = optuna.create_study(sampler=sampler)
     study.optimize(objective, n_trials=20)
     print(study.best_trial.value, study.best_trial.params)
diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
index 183facdc..5f6ce774 100644
--- a/package/samplers/user_prior_cmaes/sampler.py
+++ b/package/samplers/user_prior_cmaes/sampler.py
@@ -108,6 +108,7 @@ def sample_relative(
                 "The most probable reason is duplicated names in param_names."
             )
         elif len(search_space) != 0:
+            # Ensure the parameter order is identical to that in param_names.
             search_space = {
                 param_name: search_space[param_name] for param_name in self._param_names
             }

From 674bbed34a0430aa1972d3a1ecc9a5da99c69b5d Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Wed, 2 Oct 2024 08:12:26 +0200
Subject: [PATCH 14/14] Address c-bata's comment

---
 package/samplers/user_prior_cmaes/README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/package/samplers/user_prior_cmaes/README.md b/package/samplers/user_prior_cmaes/README.md
index 01e03663..3019d902 100644
--- a/package/samplers/user_prior_cmaes/README.md
+++ b/package/samplers/user_prior_cmaes/README.md
@@ -47,7 +47,10 @@ def objective(trial: optuna.Trial) -> float:
 
 if __name__ == "__main__":
     module = optunahub.load_module(package="samplers/user_prior_cmaes")
-    sampler = module.UserPriorCmaEsSampler(param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]))
+    # ``with_margin=True`` because the search space has an integer parameter.
+    sampler = module.UserPriorCmaEsSampler(
+        param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]), with_margin=True
+    )
     study = optuna.create_study(sampler=sampler)
     study.optimize(objective, n_trials=20)
     print(study.best_trial.value, study.best_trial.params)