Merge main

optuna · Dec 13, 2024 · 0ed7272 · 0ed7272
2 parents fedd54f + 99476d8
commit 0ed7272
Show file tree

Hide file tree

Showing 18 changed files with 463 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -1,11 +1,31 @@
-# OptunaHub Registry
+OptunaHub Registry
+==================
+
+![OptunaHub](https://github.com/user-attachments/assets/ee24b6eb-a431-4e02-ae52-c2538ffe01ee)
+
+:link: [**OptunaHub**](https://hub.optuna.org/)
+| :page_with_curl: [**Docs**](https://optuna.github.io/optunahub/)
+| :page_with_curl: [**Tutorials**](https://optuna.github.io/optunahub-registry/)
+| [**Optuna.org**](https://optuna.org/)
 
 OptunaHub Registry is a registry service for sharing and discovering user-defined Optuna packages. It provides a platform for users to share their Optuna packages with others and discover useful packages created by other users.
 
 See the [OptunaHub Website](https://hub.optuna.org/) for registered packages.
 
 See also the [OptunaHub API documentation](https://optuna.github.io/optunahub/) for the API to use the registry, and the [OptunaHub tutorial](https://optuna.github.io/optunahub-registry/) for how to register and discover packages.
 
+## Contribution
+
+Any contributions to OptunaHub are more than welcome!
+
+OptunaHub is composed of the following three related repositories. Please contribute to the appropriate repository for your purposes.
+- [optunahub](https://github.com/optuna/optunahub)
+    - The python library to use OptunaHub. If you find issues and/or bugs in the optunahub library, please report it here via [Github issues](https://github.com/optuna/optunahub/issues).
+- [optunahub-registry](https://github.com/optuna/optunahub-registry/) (*this repository*)
+    - The registry of the OptunaHub packages. If you are interested in registering your package with OptunaHub, please contribute to this repository. For general guidelines on how to contribute to the repository, take a look at [CONTRIBUTING.md](https://github.com/optuna/optunahub-registry/blob/main/CONTRIBUTING.md).
+- [optunahub-web](https://github.com/optuna/optunahub-web/)
+    - The web frontend for OptunaHub. If you find issues and/or bugs on the website, please report it here via [GitHub issues](https://github.com/optuna/optunahub-web/issues).
+
 ## Quick TODO List towards Contribution
 
 When creating your package, please check the following TODO list:

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -14,6 +14,8 @@ Recipes
 - :doc:`recipes/003_pruner`
 - :doc:`recipes/004_visualization`
 - :doc:`recipes/005_debugging`
+- :doc:`recipes/006_benchmarks_basic`
+- :doc:`recipes/007_benchmarks_advanced`
 
 
 License

diff --git a/package/samplers/cmamae/README.md b/package/samplers/cmamae/README.md
@@ -27,6 +27,10 @@ with improvement ranking, all wrapped up in a
 However, it is possible to implement many variations of CMA-MAE and other
 quality diversity algorithms using pyribs.
 
+For visualizing the results of the `CmaMaeSampler`, note that we use the
+`plot_grid_archive_heatmap` function from the
+[`plot_pyribs`](https://hub.optuna.org/visualization/plot_pyribs/) plugin.
+
 ## Class or Function Names
 
 - CmaMaeSampler
@@ -46,12 +50,17 @@ $ pip install ribs
 ## Example
 
 ```python
+import matplotlib.pyplot as plt
 import optuna
 import optunahub
 
+
 module = optunahub.load_module("samplers/cmamae")
 CmaMaeSampler = module.CmaMaeSampler
 
+plot_pyribs = optunahub.load_module(package="visualization/plot_pyribs")
+plot_grid_archive_heatmap = plot_pyribs.plot_grid_archive_heatmap
+
 
 def objective(trial: optuna.trial.Trial) -> float:
     """Returns an objective followed by two measures."""
@@ -80,6 +89,11 @@ if __name__ == "__main__":
     )
     study = optuna.create_study(sampler=sampler)
     study.optimize(objective, n_trials=10000)
+
+    fig, ax = plt.subplots(figsize=(8, 6))
+    plot_grid_archive_heatmap(study, ax=ax)
+    plt.savefig("archive.png")
+    plt.show()
 ```
 
 ## Others

diff --git a/package/samplers/cmamae/example.py b/package/samplers/cmamae/example.py
@@ -1,10 +1,14 @@
+import matplotlib.pyplot as plt
 import optuna
 import optunahub
 
 
 module = optunahub.load_module("samplers/cmamae")
 CmaMaeSampler = module.CmaMaeSampler
 
+plot_pyribs = optunahub.load_module(package="visualization/plot_pyribs")
+plot_grid_archive_heatmap = plot_pyribs.plot_grid_archive_heatmap
+
 
 def objective(trial: optuna.trial.Trial) -> float:
     """Returns an objective followed by two measures."""
@@ -33,3 +37,8 @@ def objective(trial: optuna.trial.Trial) -> float:
     )
     study = optuna.create_study(sampler=sampler)
     study.optimize(objective, n_trials=10000)
+
+    fig, ax = plt.subplots(figsize=(8, 6))
+    plot_grid_archive_heatmap(study, ax=ax)
+    plt.savefig("archive.png")
+    plt.show()
diff --git a/package/samplers/hebo/README.md b/package/samplers/hebo/README.md
@@ -14,10 +14,12 @@ license: MIT License
 ## Installation
 
 ```bash
-pip install -r https://hub.optuna.org/samplers/hebo/requirements.txt
-git clone [email protected]:huawei-noah/HEBO.git
-cd HEBO/HEBO
-pip install -e .
+# Install the dependencies.
+pip install optunahub hebo
+
+# NOTE: Below is optional, but pymoo must be installed after NumPy for faster HEBOSampler,
+# we run the following command to make sure that the compiled version is installed.
+pip install --upgrade pymoo
 ```
 
 ## APIs
@@ -59,11 +61,7 @@ def objective(trial: optuna.trial.Trial) -> float:
 
 
 module = optunahub.load_module("samplers/hebo")
-sampler = module.HEBOSampler(search_space={
-    "x": optuna.distributions.FloatDistribution(-10, 10),
-    "y": optuna.distributions.IntDistribution(-10, 10),
-})
-# sampler = module.HEBOSampler()  # Note: `search_space` is not required, and thus it works too.
+sampler = module.HEBOSampler()
 study = optuna.create_study(sampler=sampler)
 study.optimize(objective, n_trials=100)
 
@@ -73,6 +71,19 @@ print(study.best_trial.params, study.best_trial.value)
 See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/hebo/example.py) for a full example.
 ![History Plot](images/hebo_optimization_history.png "History Plot")
 
+Note that it may slightly speed up the sampling routine by giving the search space directly to `HEBOSampler` since Optuna can skip the search space inference.
+For example, the instantiation of `HEBOSampler` above can be modified as follows:
+
+```python
+search_space={
+    "x": optuna.distributions.FloatDistribution(-10, 10),
+    "y": optuna.distributions.IntDistribution(-10, 10),
+}
+sampler = module.HEBOSampler(search_space=search_space)
+```
+
+However, users need to make sure that the provided search space and the search space defined in the objective function must be consistent.
+
 ## Others
 
 HEBO is the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).

diff --git a/package/samplers/hebo/requirements.txt b/package/samplers/hebo/requirements.txt
@@ -1,3 +1,3 @@
 optuna
 optunahub
-hebo@git+https://github.com/huawei-noah/[email protected]#subdirectory=HEBO
+hebo
diff --git a/package/samplers/hebo/sampler.py b/package/samplers/hebo/sampler.py
@@ -2,14 +2,14 @@
 
 from collections.abc import Sequence
 from typing import Any
-import warnings
 
 import numpy as np
 import optuna
 from optuna.distributions import BaseDistribution
 from optuna.distributions import CategoricalDistribution
 from optuna.distributions import FloatDistribution
 from optuna.distributions import IntDistribution
+from optuna.logging import get_logger
 from optuna.samplers import BaseSampler
 from optuna.search_space import IntersectionSearchSpace
 from optuna.study import Study
@@ -23,6 +23,9 @@
 from hebo.optimizers.hebo import HEBO
 
 
+_logger = get_logger(f"optuna.{__name__}")
+
+
 class HEBOSampler(optunahub.samplers.SimpleBaseSampler):
     """A sampler using `HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>__` as the backend.
 
@@ -85,7 +88,6 @@ def __init__(
             self._hebo = None
         self._intersection_search_space = IntersectionSearchSpace()
         self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
-        self._is_independent_sample_necessary = False
         self._constant_liar = constant_liar
         self._rng = np.random.default_rng(seed)
 
@@ -113,12 +115,12 @@ def _suggest_and_transform_to_dict(
 
     def _sample_relative_define_and_run(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         return self._suggest_and_transform_to_dict(self._hebo, search_space)
 
     def _sample_relative_stateless(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         if self._constant_liar:
             target_states = [TrialState.COMPLETE, TrialState.RUNNING]
         else:
@@ -131,10 +133,8 @@ def _sample_relative_stateless(
             # note: The backend HEBO implementation uses Sobol sampling here.
             # This sampler does not call `hebo.suggest()` here because
             # Optuna needs to know search space by running the first trial in Define-by-Run.
-            self._is_independent_sample_necessary = True
             return {}
-        else:
-            self._is_independent_sample_necessary = False
+
         trials = [t for t in trials if set(search_space.keys()) <= set(t.params.keys())]
 
         # Assume that the back-end HEBO implementation aims to minimize.
@@ -149,12 +149,12 @@ def _sample_relative_stateless(
         params = pd.DataFrame([t.params for t in trials])
         values[np.isnan(values)] = worst_value
         values *= sign
-        hebo.observe(params, values)
+        hebo.observe(params, values[:, np.newaxis])
         return self._suggest_and_transform_to_dict(hebo, search_space)
 
     def sample_relative(
         self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
-    ) -> dict[str, float]:
+    ) -> dict[str, Any]:
         if study._is_multi_objective():
             raise ValueError(
                 f"{self.__class__.__name__} has not supported multi-objective optimization."
@@ -226,10 +226,10 @@ def sample_independent(
         param_name: str,
         param_distribution: BaseDistribution,
     ) -> Any:
-        if not self._is_independent_sample_necessary:
-            warnings.warn(
-                "`HEBOSampler` falls back to `RandomSampler` due to dynamic search space."
-            )
+        states = (TrialState.COMPLETE,)
+        trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
+        if any(param_name in trial.params for trial in trials):
+            _logger.warn(f"Use `RandomSampler` for {param_name} due to dynamic search space.")
 
         return self._independent_sampler.sample_independent(
             study, trial, param_name, param_distribution

diff --git a/package/samplers/smac_sampler/README.md b/package/samplers/smac_sampler/README.md
@@ -7,9 +7,30 @@ optuna_versions: [3.6.1]
 license: MIT License
 ---
 
-## Class or Function Names
+## APIs
 
-- SAMCSampler
+A sampler that uses SMAC3 v2.2.0.
+
+Please check the API reference for more details:
+
+- https://automl.github.io/SMAC3/main/5_api.html
+
+### `SMACSampler(search_space: dict[str, BaseDistribution], n_trials: int = 100, seed: int | None = None, *, surrogate_model_type: str = "rf", acq_func_type: str = "ei_log", init_design_type: str = "sobol", surrogate_model_rf_num_trees: int = 10, surrogate_model_rf_ratio_features: float = 1.0, surrogate_model_rf_min_samples_split: int = 2, surrogate_model_rf_min_samples_leaf: int = 1, init_design_n_configs: int | None = None, init_design_n_configs_per_hyperparameter: int = 10, init_design_max_ratio: float = 0.25, output_directory: str = "smac3_output")`
+
+- `search_space`: A dictionary of Optuna distributions.
+- `n_trials`: Number of trials to be evaluated in a study. This argument is used to determine the number of initial configurations by SMAC3. Use at most `n_trials * init_design_max_ratio` number of configurations in the initial design. This argument does not have to be precise, but it is better to be exact for better performance.
+- `seed`: Seed for random number generator. If `None` is given, seed is generated randomly.
+- `surrogate_model_type`: What model to use for the probabilistic model. Either `"gp"` (Gaussian process), `"gp_mcmc"` (Gaussian process with MCMC), or `"rf"` (random forest). Default to `"rf"` (random forest).
+- `acq_func_type`: What acquisition function to use. Either `"ei"` (expected improvement), `"ei_log"` (expected improvement with log-scaled function), `"pi"` (probability of improvement), or `"lcb"` (lower confidence bound). Default to `"ei_log"`.
+- `init_design_type`: What initialization sampler to use. Either `"sobol"` (Sobol sequence), `"lhd"` (Latin hypercube), or `"random"`. Default to `"sobol"`.
+- `surrogate_model_rf_num_trees`: The number of trees used for random forest. Equivalent to `n_estimators` in `RandomForestRegressor` in sklearn.
+- `surrogate_model_rf_ratio_features`: The ratio of features to use for each tree training in random forest. Equivalent to `max_features` in `RandomForestRegressor` in sklearn.
+- `surrogate_model_rf_min_samples_split`: The minimum number of samples required to split an internal node: Equivalent to `min_samples_split` in `RandomForestRegressor` in sklearn.
+- `surrogate_model_rf_min_samples_leaf`: The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least `min_samples_leaf` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. Equivalent to `min_samples_leaf` in `RandomForestRegressor` in sklearn.
+- `init_design_n_configs`: Number of initial configurations.
+- `init_design_n_configs_per_hyperparameter`: Number of initial configurations per hyperparameter. For example, if my configuration space covers five hyperparameters and `n_configs_per_hyperparameter` is set to 10, then 50 initial configurations will be sampled.
+- `init_design_max_ratio`: Use at most `n_trials * init_design_max_ratio` number of configurations in the initial design. Additional configurations are not affected by this parameter.
+- `output_directy`: Output directory path, defaults to `"smac3_output"`. The directory in which to save the output. The files are saved in `./output_directory/name/seed`.
 
 ## Installation
 
@@ -41,6 +62,7 @@ sampler = SMACSampler(
         "y": optuna.distributions.IntDistribution(-10, 10),
     },
     n_trials=n_trials,
+    output_directory="smac3_output",
 )
 study = optuna.create_study(sampler=sampler)
 study.optimize(objective, n_trials=n_trials)

diff --git a/package/samplers/smac_sampler/sampler.py b/package/samplers/smac_sampler/sampler.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections.abc import Sequence
+from pathlib import Path
 
 from ConfigSpace import Categorical
 from ConfigSpace import Configuration
@@ -96,6 +97,10 @@ class SMACSampler(optunahub.samplers.SimpleBaseSampler):
         init_design_max_ratio:
             Use at most ``n_trials * init_design_max_ratio`` number of configurations in the
             initial design. Additional configurations are not affected by this parameter.
+        output_directory:
+            Output directory path, defaults to "smac3_output".
+            The directory in which to save the output.
+            The files are saved in `./output_directory/name/seed`.
     """
 
     def __init__(
@@ -114,11 +119,16 @@ def __init__(
         init_design_n_configs: int | None = None,
         init_design_n_configs_per_hyperparameter: int = 10,
         init_design_max_ratio: float = 0.25,
+        output_directory: str = "smac3_output",
     ) -> None:
         super().__init__(search_space)
         self._cs, self._hp_scale_value = self._convert_to_config_space_design_space(search_space)
         scenario = Scenario(
-            configspace=self._cs, deterministic=True, n_trials=n_trials, seed=seed or -1
+            configspace=self._cs,
+            deterministic=True,
+            n_trials=n_trials,
+            seed=seed or -1,
+            output_directory=Path(output_directory),
         )
         surrogate_model = self._get_surrogate_model(
             scenario,

diff --git a/package/visualization/plot_pyribs/LICENSE b/package/visualization/plot_pyribs/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Bryon Tjanaka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.