Merge remote-tracking branch 'upstream/main' into add-hpolib

optuna · Jan 16, 2025 · 48b1534 · 48b1534
2 parents c5f60a1 + d806762
commit 48b1534
Show file tree

Hide file tree

Showing 8 changed files with 200 additions and 3 deletions.
diff --git a/package/benchmarks/hpobench_nn/LICENSE b/package/benchmarks/hpobench_nn/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Preferred Networks, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/benchmarks/hpobench_nn/README.md b/package/benchmarks/hpobench_nn/README.md
@@ -0,0 +1,90 @@
+---
+author: Optuna Team
+title: HPOBench; A Collection of Reproducible Multi-Fidelity Benchmark Problems for HPO
+description: The hyperparameter optimization benchmark datasets introduced in the paper "HPOBench; A Collection of Reproducible Multi-Fidelity Benchmark Problems for HPO"
+tags: [benchmark, HPO, NAS, AutoML, hyperparameter optimization, real world problem]
+optuna_versions: [4.1.0]
+license: MIT License
+---
+
+## Abstract
+
+Hyperparameter optimization benchmark introduced in the paper [`HPOBench: A Collection of Reproducible Multi-Fidelity Benchmark Problems for HPO`](https://arxiv.org/abs/2109.06716).
+The original benchmark is available [here](https://github.com/automl/hpobench).
+Please note that this benchmark provides the results only at the last epoch of each configuration.
+
+## APIs
+
+### class `Problem(dataset_id: int, seed: int | None = None, metric_names: list[str] | None = None)`
+
+- `dataset_id`: ID of the dataset to use. It must be in the range of `[0, 7]`. Please use `Problem.available_dataset_names` to see the available dataset names.
+- `seed`: The seed for the random number generator of the dataset.
+- `metric_names`: The metrics to use in optimization. Defaults to `None`, leading to single-objective optimization of the main metric defined in [here](https://github.com/nabenabe0928/simple-hpo-bench/blob/v0.2.0/hpo_benchmarks/hpolib.py#L16). Please use `Problem.available_metric_names` to see the available metric names.
+
+#### Methods and Properties
+
+- `search_space`: Return the search space.
+  - Returns: `dict[str, optuna.distributions.BaseDistribution]`
+- `directions`: Return the optimization directions.
+  - Returns: `list[optuna.study.StudyDirection]`
+- `metric_names`: The names of the metrics to be used in the optimization.
+  - Returns: `list[str]`
+- `available_metric_names`: `list[str]`
+  - Returns: The names of the available metrics.
+- `available_dataset_names`: `list[str]`
+  - Returns: The names of the available datasets.
+- `__call__(trial: optuna.Trial)`: Evaluate the objective functions and return the objective values.
+  - Args:
+    - `trial`: Optuna trial object.
+  - Returns: `list[float]`
+- `evaluate(params: dict[str, int | float | str])`: Evaluate the objective function given a dictionary of parameters.
+  - Args:
+    - `params`: The parameters defined in `search_space`.
+  - Returns: `list[float]`
+- `reseed(seed: int | None = None)`: Recreate the random number generator with the given seed.
+  - Args:
+    - `seed`: The seed to be used.
+
+## Installation
+
+To use this benchmark, you need to install `simple-hpo-bench`.
+
+```shell
+$ pip install simple-hpo-bench
+```
+
+## Example
+
+```python
+from __future__ import annotations
+
+import optuna
+import optunahub
+
+
+hpobench = optunahub.load_module("benchmarks/hpobench_nn")
+problem = hpobench.Problem(dataset_id=0)
+study = optuna.create_study()
+study.optimize(problem, n_trials=30)
+print(study.best_trial)
+
+```
+
+## Others
+
+### Reference
+
+This benchmark was originally introduced by [AutoML.org](https://github.com/automl/hpobench), but our backend relies on [`simple-hpo-bench`](https://github.com/nabenabe0928/simple-hpo-bench/).
+
+### Bibtex
+
+```bibtex
+@inproceedings{
+  eggensperger2021hpobench,
+  title={{HPOB}ench: A Collection of Reproducible Multi-Fidelity Benchmark Problems for {HPO}},
+  author={Katharina Eggensperger and Philipp M{\"u}ller and Neeratyoy Mallik and Matthias Feurer and Rene Sass and Aaron Klein and Noor Awad and Marius Lindauer and Frank Hutter},
+  booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
+  year={2021},
+  url={https://openreview.net/forum?id=1k4rJYEwda-}
+}
+```
diff --git a/package/benchmarks/hpobench_nn/__init__.py b/package/benchmarks/hpobench_nn/__init__.py
@@ -0,0 +1,4 @@
+from ._hpobench import Problem
+
+
+__all__ = ["Problem"]
diff --git a/package/benchmarks/hpobench_nn/_hpobench.py b/package/benchmarks/hpobench_nn/_hpobench.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from hpo_benchmarks import HPOBench
+import optuna
+import optunahub
+
+
+_INDEX_SUFFIX = "_index"
+_DIRECTIONS = {
+    "minimize": optuna.study.StudyDirection.MINIMIZE,
+    "maximize": optuna.study.StudyDirection.MAXIMIZE,
+}
+
+
+def _extract_search_space(bench: HPOBench) -> dict[str, optuna.distributions.BaseDistribution]:
+    param_types = bench.param_types
+    search_space = {}
+    for param_name, choices in bench.search_space.items():
+        n_choices = len(choices)
+        key = f"{param_name}{_INDEX_SUFFIX}"
+        if param_types[param_name] == str:
+            dist = optuna.distributions.CategoricalDistribution(list(range(n_choices)))
+        else:
+            dist = optuna.distributions.IntDistribution(low=0, high=n_choices - 1)
+        search_space[key] = dist
+    return search_space
+
+
+class Problem(optunahub.benchmarks.BaseProblem):
+    available_metric_names: list[str] = HPOBench.available_metric_names
+    available_dataset_names: list[int] = HPOBench.available_dataset_names
+
+    def __init__(
+        self, dataset_id: int, seed: int | None = None, metric_names: list[str] | None = None
+    ):
+        if dataset_id < 0 or dataset_id >= len(self.available_dataset_names):
+            n_datasets = len(self.available_dataset_names)
+            raise ValueError(
+                f"dataset_id must be between 0 and {n_datasets - 1}, but got {dataset_id}."
+            )
+
+        self.dataset_name = self.available_dataset_names[dataset_id]
+        self._problem = HPOBench(
+            dataset_name=self.dataset_name, seed=seed, metric_names=metric_names
+        )
+        self._search_space = _extract_search_space(self._problem)
+
+    @property
+    def search_space(self) -> dict[str, optuna.distributions.BaseDistribution]:
+        return self._search_space.copy()
+
+    @property
+    def directions(self) -> list[optuna.study.StudyDirection]:
+        return [_DIRECTIONS[self._problem.directions[name]] for name in self.metric_names]
+
+    def evaluate(self, params: dict[str, int | float | str]) -> list[float]:
+        problem_search_space = self._problem.search_space
+        len_suffix = len(_INDEX_SUFFIX)
+        modified_params = {}
+        for index_name, choice_index in params.items():
+            param_name = index_name[:-len_suffix]
+            modified_params[param_name] = problem_search_space[param_name][choice_index]
+
+        results = self._problem(modified_params)
+        return [results[name] for name in self.metric_names]
+
+    def reseed(self, seed: int | None = None) -> None:
+        self._problem.reseed(seed)
+
+    @property
+    def metric_names(self) -> list[str]:
+        return self._problem.metric_names
diff --git a/package/benchmarks/hpobench_nn/example.py b/package/benchmarks/hpobench_nn/example.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+import optuna
+import optunahub
+
+
+hpobench = optunahub.load_module("benchmarks/hpobench_nn")
+problem = hpobench.Problem(dataset_id=0)
+study = optuna.create_study()
+study.optimize(problem, n_trials=30)
+print(study.best_trial)
diff --git a/package/benchmarks/hpobench_nn/requirements.txt b/package/benchmarks/hpobench_nn/requirements.txt
@@ -0,0 +1 @@
+simple-hpo-bench
diff --git a/package/samplers/user_prior_cmaes/sampler.py b/package/samplers/user_prior_cmaes/sampler.py
@@ -91,8 +91,6 @@ def _validate_user_prior(
             )
         if not np.allclose(cov0, cov0.T):
             raise ValueError("cov0 must be a symmetric matrix.")
-        if np.any(cov0 < 0.0):
-            raise ValueError("All elements in cov0 must be non-negative.")
         if np.any(np.linalg.eigvals(cov0) < 0.0):
             raise ValueError("cov0 must be a semi-positive definite matrix.")
 

diff --git a/template/example.py b/template/example.py
@@ -24,7 +24,7 @@ def objective(trial: optuna.Trial) -> float:
     # This is an example of how to load a sampler from your local optunahub-registry.
     sampler = optunahub.load_local_module(
         package=package_name,
-        registry_root="./",  # Path to the root of the optunahub-registry.
+        registry_root="./package",  # Path to the root of the optunahub-registry.
     ).YourSampler()
 else:
     # This is an example of how to load a sampler from your fork of the optunahub-registry.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from ._hpobench import Problem


		__all__ = ["Problem"]