From 5f070249fb1048f4665a83efec061a929a464edf Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Wed, 13 Nov 2024 09:31:55 +0100
Subject: [PATCH] Add a draft of c-TPE

---
 package/samplers/ctpe/LICENSE             |  21 +++
 package/samplers/ctpe/README.md           | 111 +++++++++++++++
 package/samplers/ctpe/__init__.py         |   4 +
 package/samplers/ctpe/components.py       |  48 +++++++
 package/samplers/ctpe/parzen_estimator.py | 162 ++++++++++++++++++++++
 package/samplers/ctpe/requirements.txt    |   1 +
 package/samplers/ctpe/sampler.py          | 122 ++++++++++++++++
 7 files changed, 469 insertions(+)
 create mode 100644 package/samplers/ctpe/LICENSE
 create mode 100644 package/samplers/ctpe/README.md
 create mode 100644 package/samplers/ctpe/__init__.py
 create mode 100644 package/samplers/ctpe/components.py
 create mode 100644 package/samplers/ctpe/parzen_estimator.py
 create mode 100644 package/samplers/ctpe/requirements.txt
 create mode 100644 package/samplers/ctpe/sampler.py

diff --git a/package/samplers/ctpe/LICENSE b/package/samplers/ctpe/LICENSE
new file mode 100644
index 00000000..f6547d35
--- /dev/null
+++ b/package/samplers/ctpe/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Shuhei Watanabe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/samplers/ctpe/README.md b/package/samplers/ctpe/README.md
new file mode 100644
index 00000000..42582c22
--- /dev/null
+++ b/package/samplers/ctpe/README.md
@@ -0,0 +1,111 @@
+---
+author: Shuhei Watanabe
+title: c-TPE; Tree-structured Parzen Estimator with Inequality Constraints for Expensive Hyperparameter Optimization
+description: The optimizer that reproduces the algorithm described in the paper ``c-TPE; Tree-structured Parzen Estimator with Inequality Constraints for Expensive Hyperparameter Optimization''.
+tags: [sampler, tpe, c-tpe, paper, research]
+optuna_versions: [v4.1.0]
+license: MIT License
+---
+
+## Abstract
+
+This package aims to reproduce the TPE algorithm used in the paper published at IJCAI'23:
+
+- [c-TPE: Tree-structured Parzen Estimator with Inequality Constraints for Expensive Hyperparameter Optimization](https://arxiv.org/abs/2211.14411)
+
+The default parameter set of this sampler is the recommended setup from the paper and the experiments in the paper can also be reproduced by this sampler.
+
+## Class or Function Names
+
+- cTPESampler
+
+## Installation
+
+The version constraint of this package is Optuna v4.0.0 or later.
+
+```shell
+# The requirement is only Optuna.
+$ pip install optuna
+
+# You can also optionally install as follows:
+$ pip install -r https://hub.optuna.org/samplers/ctpe/requirements.txt
+```
+
+## Example
+
+TODO: Change here.
+
+This sampler supports the arguments discussed in [the original paper](https://arxiv.org/abs/2304.11127) and can be used in this way.
+
+```python
+import numpy as np
+
+import optuna
+
+import optunahub
+
+
+def objective(trial):
+    x = trial.suggest_float("x", -5, 5)
+    y = trial.suggest_int("y", -5, 5)
+    z = trial.suggest_categorical("z", ["a", "aa", "aaa"])
+    return len(z) * (x**2 + y**2)
+
+
+module = optunahub.load_module(package="samplers/tpe_tutorial")
+optuna.logging.set_verbosity(optuna.logging.CRITICAL)
+arg_choices = {
+    "consider_prior": [True, False],
+    "consider_magic_clip": [True, False],
+    "multivariate": [True, False],
+    "b_magic_exponent": [0.5, 1.0, 2.0, np.inf],
+    "min_bandwidth_factor": [0.01, 0.1],
+    "gamma_strategy": ["linear", "sqrt"],
+    "weight_strategy": ["uniform", "old-decay", "old-drop", "EI"],
+    "bandwidth_strategy": ["optuna", "hyperopt", "scott"],
+    "categorical_prior_weight": [0.1, None],
+}
+for arg_name, choices in arg_choices.items():
+    results = []
+    for choice in choices:
+        print(arg_name, choice)
+        sampler = module.CustomizableTPESampler(seed=0, **{arg_name: choice})
+        study = optuna.create_study(sampler=sampler)
+        study.optimize(objective, n_trials=100 if arg_name != "b_magic_exponent" else 200)
+        results.append(study.trials[-1].value)
+
+    print(f"Did every setup yield different results for {arg_name}?: {len(set(results)) == len(results)}")
+
+```
+
+In the paper, the following arguments, which do not exist in Optuna, were researched:
+
+- `gamma_strategy`: The splitting algorithm in Table 3. The choices are `linear` and `sqrt`.
+- `gamma_beta`: The beta parameter for the splitting algorithm in Table 3. This value must be positive.
+- `weight_strategy`: The weighting algorithm in Table 3. The choices are `uniform`, `old-decay`, `old-drop`, and `EI`.
+- `categorical_prior_weight`: The categorical bandwidth in Table 3. If `None`, the Optuna default algorithm will be used.
+- `bandwidth_strategy`: The bandwidth selection heuristic in Table 6. The choices are `optuna`, `hyperopt`, and `scott`.
+- `min_bandwidth_factor`: The minimum bandwidth factor in Table 6. This value must be positive.
+- `b_magic_exponent`: The exponent alpha in Table 6. Optuna takes 1.0 by default.
+
+For more details, please check [the paper](https://arxiv.org/abs/2304.11127).
+
+### Bibtex
+
+When you use this sampler, please cite the following:
+
+```bibtex
+@inproceedings{watanabe_ctpe_ijcai_2023,
+    title={{c-TPE}: Tree-structured {P}arzen Estimator with Inequality Constraints for Expensive Hyperparameter Optimization},
+    author={Watanabe, Shuhei and Hutter, Frank},
+    booktitle={International Joint Conference on Artificial Intelligence},
+    year={2023}
+}
+
+@inproceedings{watanabe_ctpe_workshop_2022,
+    title={{c-TPE}: Generalizing Tree-structured {P}arzen Estimator with Inequality Constraints for Continuous and Categorical Hyperparameter Optimization},
+    author={Watanabe, Shuhei and Hutter, Frank},
+    journal = {Gaussian Processes, Spatiotemporal Modeling, and Decision-making Systems Workshop at Advances in Neural Information Processing Systems},
+    year={2022}
+}
+```
diff --git a/package/samplers/ctpe/__init__.py b/package/samplers/ctpe/__init__.py
new file mode 100644
index 00000000..079ad70e
--- /dev/null
+++ b/package/samplers/ctpe/__init__.py
@@ -0,0 +1,4 @@
+from .sampler import cTPESampler
+
+
+__all__ = ["cTPESampler"]
diff --git a/package/samplers/ctpe/components.py b/package/samplers/ctpe/components.py
new file mode 100644
index 00000000..71a8d6d1
--- /dev/null
+++ b/package/samplers/ctpe/components.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+import numpy as np
+
+
+class GammaFunc:
+    def __init__(self, strategy: str, beta: float):
+        strategy_choices = ["linear", "sqrt"]
+        if strategy not in strategy_choices:
+            raise ValueError(f"strategy must be in {strategy_choices}, but got {strategy}.")
+
+        self._strategy = strategy
+        self._beta = beta
+
+    def __call__(self, x: int) -> int:
+        if self._strategy == "linear":
+            n = int(np.ceil(self._beta * x))
+        elif self._strategy == "sqrt":
+            n = int(np.ceil(self._beta * np.sqrt(x)))
+        else:
+            assert "Should not reach."
+
+        return min(n, 25)
+
+
+class WeightFunc:
+    def __init__(self, strategy: str):
+        strategy_choices = ["old-decay", "old-drop", "uniform"]
+        if strategy not in strategy_choices:
+            raise ValueError(f"strategy must be in {strategy_choices}, but got {strategy}.")
+
+        self._strategy = strategy
+
+    def __call__(self, x: int) -> np.ndarray:
+        if x == 0:
+            return np.asarray([])
+        elif x < 25 or self._strategy == "uniform":
+            return np.ones(x)
+        elif self._strategy == "old-decay":
+            ramp = np.linspace(1.0 / x, 1.0, num=x - 25)
+            flat = np.ones(25)
+            return np.concatenate([ramp, flat], axis=0)
+        elif self._strategy == "old-drop":
+            weights = np.ones(x)
+            weights[:-25] = 1e-12
+            return weights
+        else:
+            assert "Should not reach."
diff --git a/package/samplers/ctpe/parzen_estimator.py b/package/samplers/ctpe/parzen_estimator.py
new file mode 100644
index 00000000..4bba2390
--- /dev/null
+++ b/package/samplers/ctpe/parzen_estimator.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import NamedTuple
+
+import numpy as np
+from optuna.distributions import CategoricalDistribution
+from optuna.samplers._tpe.parzen_estimator import _ParzenEstimator
+from optuna.samplers._tpe.probability_distributions import _BatchedCategoricalDistributions
+from optuna.samplers._tpe.probability_distributions import _BatchedDiscreteTruncNormDistributions
+from optuna.samplers._tpe.probability_distributions import _BatchedDistributions
+from optuna.samplers._tpe.probability_distributions import _BatchedTruncNormDistributions
+
+
+class _CustomizableParzenEstimatorParameters(NamedTuple):
+    consider_prior: bool
+    prior_weight: float | None
+    consider_magic_clip: bool
+    weights: Callable[[int], np.ndarray]
+    multivariate: bool
+    b_magic_exponent: float
+    min_bandwidth_factor: float
+    bandwidth_strategy: str
+    categorical_prior_weight: float | None
+
+
+def _bandwidth_hyperopt(
+    mus: np.ndarray,
+    low: float,
+    high: float,
+    step: float | None,
+) -> np.ndarray:
+    step_or_0 = step or 0
+    sorted_indices = np.argsort(mus)
+    sorted_mus_with_endpoints = np.empty(len(mus) + 2, dtype=float)
+    sorted_mus_with_endpoints[0] = low - step_or_0 / 2
+    sorted_mus_with_endpoints[1:-1] = mus[sorted_indices]
+    sorted_mus_with_endpoints[-1] = high + step_or_0 / 2
+    sorted_sigmas = np.maximum(
+        sorted_mus_with_endpoints[1:-1] - sorted_mus_with_endpoints[0:-2],
+        sorted_mus_with_endpoints[2:] - sorted_mus_with_endpoints[1:-1],
+    )
+    return sorted_sigmas[np.argsort(sorted_indices)]
+
+
+def _bandwidth_optuna(
+    n_observations: int,
+    consider_prior: bool,
+    domain_range: float,
+    dim: int,
+) -> np.ndarray:
+    SIGMA0_MAGNITUDE = 0.2
+    sigma = SIGMA0_MAGNITUDE * max(n_observations, 1) ** (-1.0 / (dim + 4)) * domain_range
+    return np.full(shape=(n_observations + consider_prior,), fill_value=sigma)
+
+
+def _bandwidth_scott(mus: np.ndarray) -> np.ndarray:
+    std = np.std(mus, ddof=int(mus.size > 1))
+    IQR = np.subtract.reduce(np.percentile(mus, [75, 25]))
+    return np.full_like(mus, 1.059 * min(IQR / 1.34, std) * mus.size**-0.2)
+
+
+def _clip_bandwidth(
+    sigmas: np.ndarray,
+    n_observations: int,
+    domain_range: float,
+    consider_prior: bool,
+    consider_magic_clip: bool,
+    b_magic_exponent: float,
+    min_bandwidth_factor: float,
+) -> np.ndarray:
+    # We adjust the range of the 'sigmas' according to the 'consider_magic_clip' flag.
+    maxsigma = 1.0 * domain_range
+    if consider_magic_clip:
+        bandwidth_factor = max(
+            min_bandwidth_factor, 1.0 / (1 + n_observations + consider_prior) ** b_magic_exponent
+        )
+        minsigma = bandwidth_factor * domain_range
+    else:
+        minsigma = 1e-12
+
+    clipped_sigmas = np.asarray(np.clip(sigmas, minsigma, maxsigma))
+    if consider_prior:
+        clipped_sigmas[-1] = maxsigma
+
+    return clipped_sigmas
+
+
+class _CustomizableParzenEstimator(_ParzenEstimator):
+    def _calculate_numerical_distributions(
+        self,
+        observations: np.ndarray,
+        low: float,
+        high: float,
+        step: float | None,
+        parameters: _CustomizableParzenEstimatorParameters,
+    ) -> _BatchedDistributions:
+        domain_range = high - low + (step or 0)
+        consider_prior = parameters.consider_prior or len(observations) == 0
+
+        if consider_prior:
+            mus = np.append(observations, [0.5 * (low + high)])
+        else:
+            mus = observations.copy()
+
+        if parameters.bandwidth_strategy == "hyperopt":
+            sigmas = _bandwidth_hyperopt(mus, low, high, step)
+        elif parameters.bandwidth_strategy == "optuna":
+            sigmas = _bandwidth_optuna(
+                n_observations=len(observations),
+                consider_prior=consider_prior,
+                domain_range=domain_range,
+                dim=len(self._search_space),
+            )
+        elif parameters.bandwidth_strategy == "scott":
+            sigmas = _bandwidth_scott(mus)
+        else:
+            raise ValueError(f"Got unknown bandwidth_strategy={parameters.bandwidth_strategy}.")
+
+        sigmas = _clip_bandwidth(
+            sigmas=sigmas,
+            n_observations=len(observations),
+            domain_range=domain_range,
+            consider_magic_clip=parameters.consider_magic_clip,
+            consider_prior=consider_prior,
+            b_magic_exponent=parameters.b_magic_exponent,
+            min_bandwidth_factor=parameters.min_bandwidth_factor,
+        )
+
+        if step is None:
+            return _BatchedTruncNormDistributions(mus, sigmas, low, high)
+        else:
+            return _BatchedDiscreteTruncNormDistributions(mus, sigmas, low, high, step)
+
+    def _calculate_categorical_distributions(
+        self,
+        observations: np.ndarray,
+        param_name: str,
+        search_space: CategoricalDistribution,
+        parameters: _CustomizableParzenEstimatorParameters,
+    ) -> _BatchedDistributions:
+        choices = search_space.choices
+        n_choices = len(choices)
+        if len(observations) == 0:
+            return _BatchedCategoricalDistributions(
+                weights=np.full((1, n_choices), fill_value=1.0 / n_choices)
+            )
+
+        n_kernels = len(observations) + parameters.consider_prior
+        observed_indices = observations.astype(int)
+        if parameters.categorical_prior_weight is None:
+            weights = np.full(shape=(n_kernels, n_choices), fill_value=1.0 / n_kernels)
+            weights[np.arange(len(observed_indices)), observed_indices] += 1
+            weights /= weights.sum(axis=1, keepdims=True)
+        else:
+            assert 0 <= parameters.categorical_prior_weight <= 1
+            b = parameters.categorical_prior_weight
+            weights = np.full(shape=(n_kernels, n_choices), fill_value=b / (n_choices - 1))
+            weights[np.arange(len(observed_indices)), observed_indices] = 1 - b
+            weights[-1] = 1.0 / n_choices
+
+        return _BatchedCategoricalDistributions(weights)
diff --git a/package/samplers/ctpe/requirements.txt b/package/samplers/ctpe/requirements.txt
new file mode 100644
index 00000000..afa8210b
--- /dev/null
+++ b/package/samplers/ctpe/requirements.txt
@@ -0,0 +1 @@
+optuna>=4.0.0
diff --git a/package/samplers/ctpe/sampler.py b/package/samplers/ctpe/sampler.py
new file mode 100644
index 00000000..ee3f436c
--- /dev/null
+++ b/package/samplers/ctpe/sampler.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from collections.abc import Sequence
+from typing import Any
+
+import numpy as np
+from optuna.distributions import BaseDistribution
+from optuna.logging import get_logger
+from optuna.samplers import TPESampler
+from optuna.samplers._tpe.parzen_estimator import _ParzenEstimator
+from optuna.study import Study
+from optuna.trial import FrozenTrial
+from optuna.trial import TrialState
+
+from .components import GammaFunc
+from .components import WeightFunc
+from .parzen_estimator import _CustomizableParzenEstimator
+from .parzen_estimator import _CustomizableParzenEstimatorParameters
+
+
+_logger = get_logger(f"optuna.{__name__}")
+
+
+def _split_trials(
+    study: Study, trials: list[FrozenTrial], n_below: int, enable_constriants: bool
+) -> tuple[list[FrozenTrial], list[FrozenTrial]]:
+    return [], []
+
+
+class cTPESampler(TPESampler):
+    def __init__(
+        self,
+        *,
+        consider_prior: bool = True,
+        prior_weight: float = 1.0,
+        consider_magic_clip: bool = True,
+        n_startup_trials: int = 10,
+        n_ei_candidates: int = 24,
+        seed: int | None = None,
+        categorical_prior_weight: float | None = 0.2,
+        multivariate: bool = True,
+        b_magic_exponent: float = np.inf,
+        min_bandwidth_factor: float = 0.01,
+        gamma_strategy: str = "sqrt",
+        gamma_beta: float = 0.25,
+        weight_strategy: str = "uniform",
+        bandwidth_strategy: str = "scott",
+        constraints_func: Callable[[FrozenTrial], Sequence[float]] | None = None,
+    ):
+        gamma = GammaFunc(strategy=gamma_strategy, beta=gamma_beta)
+        weights = WeightFunc(strategy=weight_strategy)
+        super().__init__(
+            consider_prior=consider_prior,
+            prior_weight=prior_weight,
+            consider_magic_clip=consider_magic_clip,
+            consider_endpoints=True,
+            warn_independent_sampling=False,
+            n_startup_trials=n_startup_trials,
+            n_ei_candidates=n_ei_candidates,
+            gamma=gamma,
+            weights=weights,
+            seed=seed,
+            multivariate=multivariate,
+        )
+        self._parzen_estimator_cls = _CustomizableParzenEstimator
+        self._parzen_estimator_parameters = _CustomizableParzenEstimatorParameters(
+            consider_prior=consider_prior,
+            prior_weight=prior_weight,
+            consider_magic_clip=consider_magic_clip,
+            weights=weights,
+            multivariate=multivariate,
+            b_magic_exponent=b_magic_exponent,
+            min_bandwidth_factor=min_bandwidth_factor,
+            bandwidth_strategy=bandwidth_strategy,
+            categorical_prior_weight=categorical_prior_weight,
+        )
+        self._weight_strategy = weight_strategy
+
+    def _sample(
+        self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
+    ) -> dict[str, Any]:
+        if study._is_multi_objective():
+            _logger.warning("Multi-objective c-TPE does not exist in the original paper.")
+
+        trials = study._get_trials(deepcopy=False, states=(TrialState.COMPLETE,), use_cache=True)
+
+        # We divide data into below and above.
+        n_trials = len(trials)
+        below_trials, above_trials = _split_trials(
+            study,
+            trials,
+            self._gamma(n_trials),
+            self._constraints_func is not None,
+        )
+
+        mpe_below = self._build_parzen_estimator(
+            study, search_space, below_trials, handle_below=True
+        )
+        mpe_above = self._build_parzen_estimator(
+            study, search_space, above_trials, handle_below=False
+        )
+
+        samples_below = mpe_below.sample(self._rng.rng, self._n_ei_candidates)
+        acq_func_vals = self._compute_acquisition_func(samples_below, mpe_below, mpe_above)
+        ret = TPESampler._compare(samples_below, acq_func_vals)
+
+        for param_name, dist in search_space.items():
+            ret[param_name] = dist.to_external_repr(ret[param_name])
+
+        return ret
+
+    def _compute_acquisition_func(
+        self,
+        samples: dict[str, np.ndarray],
+        mpe_below: _ParzenEstimator,
+        mpe_above: _ParzenEstimator,
+    ) -> np.ndarray:
+        log_likelihoods_below = mpe_below.log_pdf(samples)
+        log_likelihoods_above = mpe_above.log_pdf(samples)
+        acq_func_vals = log_likelihoods_below - log_likelihoods_above
+        return acq_func_vals