From 55554f99b30e26a5e9553f2f3087cc72c622404c Mon Sep 17 00:00:00 2001 From: y0z Date: Tue, 6 Feb 2024 18:35:42 +0900 Subject: [PATCH 1/7] Fetched from https://github.com/optuna/optuna/commit/bc4ee3e0018ccf8e809d2e42102fec67b3b3e838. --- optuna/integration/botorch.py | 986 ++++++++++++++++++++++++++++++++++ optuna_integration/botorch.py | 986 ++++++++++++++++++++++++++++++++++ 2 files changed, 1972 insertions(+) create mode 100644 optuna/integration/botorch.py create mode 100644 optuna_integration/botorch.py diff --git a/optuna/integration/botorch.py b/optuna/integration/botorch.py new file mode 100644 index 00000000..78746348 --- /dev/null +++ b/optuna/integration/botorch.py @@ -0,0 +1,986 @@ +from typing import Any +from typing import Callable +from typing import Dict +from typing import Optional +from typing import Sequence +from typing import Union +import warnings + +import numpy +from packaging import version + +from optuna import logging +from optuna._experimental import experimental_class +from optuna._experimental import experimental_func +from optuna._imports import try_import +from optuna._transform import _SearchSpaceTransform +from optuna.distributions import BaseDistribution +from optuna.samplers import BaseSampler +from optuna.samplers import RandomSampler +from optuna.samplers._base import _CONSTRAINTS_KEY +from optuna.samplers._base import _process_constraints_after_trial +from optuna.search_space import IntersectionSearchSpace +from optuna.study import Study +from optuna.study import StudyDirection +from optuna.trial import FrozenTrial +from optuna.trial import TrialState + + +with try_import() as _imports: + from botorch.acquisition.monte_carlo import qExpectedImprovement + from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement + from botorch.acquisition.multi_objective import monte_carlo + from botorch.acquisition.multi_objective.analytic import ExpectedHypervolumeImprovement + from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective + from botorch.acquisition.objective import ConstrainedMCObjective + from botorch.acquisition.objective import GenericMCObjective + from botorch.models import SingleTaskGP + from botorch.models.transforms.outcome import Standardize + from botorch.optim import optimize_acqf + from botorch.sampling import SobolQMCNormalSampler + import botorch.version + + if version.parse(botorch.version.version) < version.parse("0.8.0"): + from botorch.fit import fit_gpytorch_model as fit_gpytorch_mll + + def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: + return SobolQMCNormalSampler(num_samples) + + else: + from botorch.fit import fit_gpytorch_mll + + def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: + return SobolQMCNormalSampler(torch.Size((num_samples,))) + + from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning + from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization + from botorch.utils.sampling import manual_seed + from botorch.utils.sampling import sample_simplex + from botorch.utils.transforms import normalize + from botorch.utils.transforms import unnormalize + from gpytorch.mlls import ExactMarginalLogLikelihood + import torch + + +_logger = logging.get_logger(__name__) + +with try_import() as _imports_logei: + from botorch.acquisition.analytic import LogConstrainedExpectedImprovement + from botorch.acquisition.analytic import LogExpectedImprovement + + +@experimental_func("3.3.0") +def logei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Log Expected Improvement (LogEI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with single-objective optimization. + + Args: + train_x: + Previous parameter configurations. A ``torch.Tensor`` of shape + ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials + and ``n_params`` is the number of parameters. ``n_params`` may be larger than the + actual number of parameters if categorical parameters are included in the search + space, since these parameters are one-hot encoded. + Values are not normalized. + train_obj: + Previously observed objectives. A ``torch.Tensor`` of shape + ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. + ``n_objectives`` is the number of objectives. Observations are not normalized. + train_con: + Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. + ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of + constraints. A constraint is violated if strictly larger than 0. If no constraints are + involved in the optimization, this argument will be :obj:`None`. + bounds: + Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is + identical to that of ``train_x``. The first and the second rows correspond to the + lower and upper bounds for each parameter respectively. + pending_x: + Pending parameter configurations. A ``torch.Tensor`` of shape + ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already + suggested all their parameters but have not completed their evaluation, and + ``n_params`` is identical to that of ``train_x``. + + Returns: + Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. + + """ + + # We need botorch >=0.8.1 for LogExpectedImprovement. + if not _imports_logei.is_successful(): + raise ImportError( + "logei_candidates_func requires botorch >=0.8.1. " + "Please upgrade botorch or use qei_candidates_func as candidates_func instead." + ) + + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with logEI.") + n_constraints = train_con.size(1) if train_con is not None else 0 + if n_constraints > 0: + assert train_con is not None + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + if train_obj_feas.numel() == 0: + _logger.warning( + "No objective values are feasible. Using 0 as the best objective in logEI." + ) + best_f = train_obj.min() + else: + best_f = train_obj_feas.max() + + else: + train_y = train_obj + best_f = train_obj.max() + + train_x = normalize(train_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + if n_constraints > 0: + acqf = LogConstrainedExpectedImprovement( + model=model, + best_f=best_f, + objective_index=0, + constraints={i: (None, 0.0) for i in range(1, n_constraints + 1)}, + ) + else: + acqf = LogExpectedImprovement( + model=model, + best_f=best_f, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Expected Improvement (qEI). + + Args: + train_x: + Previous parameter configurations. A ``torch.Tensor`` of shape + ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials + and ``n_params`` is the number of parameters. ``n_params`` may be larger than the + actual number of parameters if categorical parameters are included in the search + space, since these parameters are one-hot encoded. + Values are not normalized. + train_obj: + Previously observed objectives. A ``torch.Tensor`` of shape + ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. + ``n_objectives`` is the number of objectives. Observations are not normalized. + train_con: + Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. + ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of + constraints. A constraint is violated if strictly larger than 0. If no constraints are + involved in the optimization, this argument will be :obj:`None`. + bounds: + Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is + identical to that of ``train_x``. The first and the second rows correspond to the + lower and upper bounds for each parameter respectively. + pending_x: + Pending parameter configurations. A ``torch.Tensor`` of shape + ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already + suggested all their parameters but have not completed their evaluation, and + ``n_params`` is identical to that of ``train_x``. + Returns: + Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. + + """ + + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with qEI.") + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + if train_obj_feas.numel() == 0: + # TODO(hvy): Do not use 0 as the best observation. + _logger.warning( + "No objective values are feasible. Using 0 as the best objective in qEI." + ) + best_f = torch.zeros(()) + else: + best_f = train_obj_feas.max() + + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: Z[..., 0], + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + best_f = train_obj.max() + + objective = None # Using the default identity objective. + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qExpectedImprovement( + model=model, + best_f=best_f, + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.3.0") +def qnei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Noisy Expected Improvement (qNEI). + + This function may perform better than qEI (`qei_candidates_func`) when + the evaluated values of objective function are noisy. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with qNEI.") + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: Z[..., 0], + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + objective = None # Using the default identity objective. + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qNoisyExpectedImprovement( + model=model, + X_baseline=train_x, + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization when the number of objectives is three or less. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + n_constraints = train_con.size(1) + additional_qehvi_kwargs = { + "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), + "constraints": [ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + } + else: + train_y = train_obj + + train_obj_feas = train_obj + + additional_qehvi_kwargs = {} + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_obj_feas, alpha=alpha) + + ref_point_list = ref_point.tolist() + + acqf = monte_carlo.qExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + partitioning=partitioning, + sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, + **additional_qehvi_kwargs, + ) + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.5.0") +def ehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Expected Hypervolume Improvement (EHVI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization without constraints. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + if train_con is not None: + raise ValueError("Constraints are not supported with ehvi_candidates_func.") + + train_y = train_obj + train_x = normalize(train_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_y, alpha=alpha) + + ref_point_list = ref_point.tolist() + + acqf = ExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + partitioning=partitioning, + ) + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.1.0") +def qnehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Noisy Expected Hypervolume Improvement (qNEHVI). + + According to Botorch/Ax documentation, + this function may perform better than qEHVI (`qehvi_candidates_func`). + (cf. https://botorch.org/tutorials/constrained_multi_objective_bo ) + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + n_constraints = train_con.size(1) + additional_qnehvi_kwargs = { + "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), + "constraints": [ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + } + else: + train_y = train_obj + + additional_qnehvi_kwargs = {} + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + ref_point_list = ref_point.tolist() + + # prune_baseline=True is generally recommended by the documentation of BoTorch. + # cf. https://botorch.org/api/acquisition.html (accessed on 2022/11/18) + acqf = monte_carlo.qNoisyExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + X_baseline=train_x, + alpha=alpha, + prune_baseline=True, + sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, + **additional_qnehvi_kwargs, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qparego_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization when the number of objectives is larger than three. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + weights = sample_simplex(n_objectives).squeeze() + scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: scalarization(Z[..., :n_objectives]), + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + objective = GenericMCObjective(scalarization) + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qExpectedImprovement( + model=model, + best_f=objective(train_y).max(), + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +def _get_default_candidates_func( + n_objectives: int, + has_constraint: bool, + consider_running_trials: bool, +) -> Callable[ + [ + "torch.Tensor", + "torch.Tensor", + Optional["torch.Tensor"], + "torch.Tensor", + Optional["torch.Tensor"], + ], + "torch.Tensor", +]: + if n_objectives > 3 and not has_constraint and not consider_running_trials: + return ehvi_candidates_func + elif n_objectives > 3: + return qparego_candidates_func + elif n_objectives > 1: + return qehvi_candidates_func + elif consider_running_trials: + return qei_candidates_func + else: + return logei_candidates_func + + +@experimental_class("2.4.0") +class BoTorchSampler(BaseSampler): + """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch. + + This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter + configurations. Parameters are transformed to continuous space and passed to BoTorch, and then + transformed back to Optuna's representations. Categorical parameters are one-hot encoded. + + .. seealso:: + See an `example `_ how to use the sampler. + + .. seealso:: + See the `BoTorch `_ homepage for details and for how to implement + your own ``candidates_func``. + + .. note:: + An instance of this sampler *should not be used with different studies* when used with + constraints. Instead, a new instance should be created for each new study. The reason for + this is that the sampler is stateful keeping all the computed constraints. + + Args: + candidates_func: + An optional function that suggests the next candidates. It must take the training + data, the objectives, the constraints, the search space bounds and return the next + candidates. The arguments are of type ``torch.Tensor``. The return value must be a + ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be + :obj:`None`. For any constraints that failed to compute, the tensor will contain + NaN. + + If omitted, it is determined automatically based on the number of objectives and + whether a constraint is specified. If the + number of objectives is one and no constraint is specified, log-Expected Improvement + is used. If constraints are specified, quasi MC-based batch Expected Improvement + (qEI) is used. + If the number of objectives is either two or three, Quasi MC-based + batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for a larger number + of objectives, analytic Expected Hypervolume Improvement is used if no constraints + are specified, or the faster Quasi MC-based extended ParEGO (qParEGO) is used if + constraints are present. + + The function should assume *maximization* of the objective. + + .. seealso:: + See :func:`optuna.integration.botorch.qei_candidates_func` for an example. + constraints_func: + An optional function that computes the objective constraints. It must take a + :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must + be a sequence of :obj:`float` s. A value strictly larger than 0 means that a + constraint is violated. A value equal to or smaller than 0 is considered feasible. + + If omitted, no constraints will be passed to ``candidates_func`` nor taken into + account during suggestion. + n_startup_trials: + Number of initial trials, that is the number of trials to resort to independent + sampling. + consider_running_trials: + If True, the acquisition function takes into consideration the running parameters + whose evaluation has not completed. Enabling this option is considered to improve the + performance of parallel optimization. + + .. note:: + Added in v3.2.0 as an experimental argument. + independent_sampler: + An independent sampler to use for the initial trials and for parameters that are + conditional. + seed: + Seed for random number generator. + device: + A ``torch.device`` to store input and output data of BoTorch. Please set a CUDA device + if you fasten sampling. + """ + + def __init__( + self, + *, + candidates_func: Optional[ + Callable[ + [ + "torch.Tensor", + "torch.Tensor", + Optional["torch.Tensor"], + "torch.Tensor", + Optional["torch.Tensor"], + ], + "torch.Tensor", + ] + ] = None, + constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, + n_startup_trials: int = 10, + consider_running_trials: bool = False, + independent_sampler: Optional[BaseSampler] = None, + seed: Optional[int] = None, + device: Optional["torch.device"] = None, + ): + _imports.check() + + self._candidates_func = candidates_func + self._constraints_func = constraints_func + self._consider_running_trials = consider_running_trials + self._independent_sampler = independent_sampler or RandomSampler(seed=seed) + self._n_startup_trials = n_startup_trials + self._seed = seed + + self._study_id: Optional[int] = None + self._search_space = IntersectionSearchSpace() + self._device = device or torch.device("cpu") + + def infer_relative_search_space( + self, + study: Study, + trial: FrozenTrial, + ) -> Dict[str, BaseDistribution]: + if self._study_id is None: + self._study_id = study._study_id + if self._study_id != study._study_id: + # Note that the check below is meaningless when `InMemoryStorage` is used + # because `InMemoryStorage.create_new_study` always returns the same study ID. + raise RuntimeError("BoTorchSampler cannot handle multiple studies.") + + search_space: Dict[str, BaseDistribution] = {} + for name, distribution in self._search_space.calculate(study).items(): + if distribution.single(): + # built-in `candidates_func` cannot handle distributions that contain just a + # single value, so we skip them. Note that the parameter values for such + # distributions are sampled in `Trial`. + continue + search_space[name] = distribution + + return search_space + + def sample_relative( + self, + study: Study, + trial: FrozenTrial, + search_space: Dict[str, BaseDistribution], + ) -> Dict[str, Any]: + assert isinstance(search_space, dict) + + if len(search_space) == 0: + return {} + + completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) + running_trials = [ + t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial + ] + trials = completed_trials + running_trials + + n_trials = len(trials) + n_completed_trials = len(completed_trials) + if n_trials < self._n_startup_trials: + return {} + + trans = _SearchSpaceTransform(search_space) + n_objectives = len(study.directions) + values: Union[numpy.ndarray, torch.Tensor] = numpy.empty( + (n_trials, n_objectives), dtype=numpy.float64 + ) + params: Union[numpy.ndarray, torch.Tensor] + con: Optional[Union[numpy.ndarray, torch.Tensor]] = None + bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds + params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) + for trial_idx, trial in enumerate(trials): + if trial.state == TrialState.COMPLETE: + params[trial_idx] = trans.transform(trial.params) + assert len(study.directions) == len(trial.values) + for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): + assert value is not None + if ( + direction == StudyDirection.MINIMIZE + ): # BoTorch always assumes maximization. + value *= -1 + values[trial_idx, obj_idx] = value + if self._constraints_func is not None: + constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( + _CONSTRAINTS_KEY + ) + if constraints is not None: + n_constraints = len(constraints) + + if con is None: + con = numpy.full( + (n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64 + ) + elif n_constraints != con.shape[1]: + raise RuntimeError( + f"Expected {con.shape[1]} constraints " + f"but received {n_constraints}." + ) + con[trial_idx] = constraints + elif trial.state == TrialState.RUNNING: + if all(p in trial.params for p in search_space): + params[trial_idx] = trans.transform(trial.params) + else: + params[trial_idx] = numpy.nan + else: + assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING." + + if self._constraints_func is not None: + if con is None: + warnings.warn( + "`constraints_func` was given but no call to it correctly computed " + "constraints. Constraints passed to `candidates_func` will be `None`." + ) + elif numpy.isnan(con).any(): + warnings.warn( + "`constraints_func` was given but some calls to it did not correctly compute " + "constraints. Constraints passed to `candidates_func` will contain NaN." + ) + + values = torch.from_numpy(values).to(self._device) + params = torch.from_numpy(params).to(self._device) + if con is not None: + con = torch.from_numpy(con).to(self._device) + bounds = torch.from_numpy(bounds).to(self._device) + + if con is not None: + if con.dim() == 1: + con.unsqueeze_(-1) + bounds.transpose_(0, 1) + + if self._candidates_func is None: + self._candidates_func = _get_default_candidates_func( + n_objectives=n_objectives, + has_constraint=con is not None, + consider_running_trials=self._consider_running_trials, + ) + + completed_values = values[:n_completed_trials] + completed_params = params[:n_completed_trials] + if self._consider_running_trials: + running_params = params[n_completed_trials:] + running_params = running_params[~torch.isnan(running_params).any(dim=1)] + else: + running_params = None + + with manual_seed(self._seed): + # `manual_seed` makes the default candidates functions reproducible. + # `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is + # deterministic when the BoTorch's seed is fixed. + candidates = self._candidates_func( + completed_params, completed_values, con, bounds, running_params + ) + if self._seed is not None: + self._seed += 1 + + if not isinstance(candidates, torch.Tensor): + raise TypeError("Candidates must be a torch.Tensor.") + if candidates.dim() == 2: + if candidates.size(0) != 1: + raise ValueError( + "Candidates batch optimization is not supported and the first dimension must " + "have size 1 if candidates is a two-dimensional tensor. Actual: " + f"{candidates.size()}." + ) + # Batch size is one. Get rid of the batch dimension. + candidates = candidates.squeeze(0) + if candidates.dim() != 1: + raise ValueError("Candidates must be one or two-dimensional.") + if candidates.size(0) != bounds.size(1): + raise ValueError( + "Candidates size must match with the given bounds. Actual candidates: " + f"{candidates.size(0)}, bounds: {bounds.size(1)}." + ) + + return trans.untransform(candidates.cpu().numpy()) + + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + return self._independent_sampler.sample_independent( + study, trial, param_name, param_distribution + ) + + def reseed_rng(self) -> None: + self._independent_sampler.reseed_rng() + if self._seed is not None: + self._seed = numpy.random.RandomState().randint(numpy.iinfo(numpy.int32).max) + + def before_trial(self, study: Study, trial: FrozenTrial) -> None: + self._independent_sampler.before_trial(study, trial) + + def after_trial( + self, + study: Study, + trial: FrozenTrial, + state: TrialState, + values: Optional[Sequence[float]], + ) -> None: + if self._constraints_func is not None: + _process_constraints_after_trial(self._constraints_func, study, trial, state) + self._independent_sampler.after_trial(study, trial, state, values) diff --git a/optuna_integration/botorch.py b/optuna_integration/botorch.py new file mode 100644 index 00000000..78746348 --- /dev/null +++ b/optuna_integration/botorch.py @@ -0,0 +1,986 @@ +from typing import Any +from typing import Callable +from typing import Dict +from typing import Optional +from typing import Sequence +from typing import Union +import warnings + +import numpy +from packaging import version + +from optuna import logging +from optuna._experimental import experimental_class +from optuna._experimental import experimental_func +from optuna._imports import try_import +from optuna._transform import _SearchSpaceTransform +from optuna.distributions import BaseDistribution +from optuna.samplers import BaseSampler +from optuna.samplers import RandomSampler +from optuna.samplers._base import _CONSTRAINTS_KEY +from optuna.samplers._base import _process_constraints_after_trial +from optuna.search_space import IntersectionSearchSpace +from optuna.study import Study +from optuna.study import StudyDirection +from optuna.trial import FrozenTrial +from optuna.trial import TrialState + + +with try_import() as _imports: + from botorch.acquisition.monte_carlo import qExpectedImprovement + from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement + from botorch.acquisition.multi_objective import monte_carlo + from botorch.acquisition.multi_objective.analytic import ExpectedHypervolumeImprovement + from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective + from botorch.acquisition.objective import ConstrainedMCObjective + from botorch.acquisition.objective import GenericMCObjective + from botorch.models import SingleTaskGP + from botorch.models.transforms.outcome import Standardize + from botorch.optim import optimize_acqf + from botorch.sampling import SobolQMCNormalSampler + import botorch.version + + if version.parse(botorch.version.version) < version.parse("0.8.0"): + from botorch.fit import fit_gpytorch_model as fit_gpytorch_mll + + def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: + return SobolQMCNormalSampler(num_samples) + + else: + from botorch.fit import fit_gpytorch_mll + + def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: + return SobolQMCNormalSampler(torch.Size((num_samples,))) + + from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning + from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization + from botorch.utils.sampling import manual_seed + from botorch.utils.sampling import sample_simplex + from botorch.utils.transforms import normalize + from botorch.utils.transforms import unnormalize + from gpytorch.mlls import ExactMarginalLogLikelihood + import torch + + +_logger = logging.get_logger(__name__) + +with try_import() as _imports_logei: + from botorch.acquisition.analytic import LogConstrainedExpectedImprovement + from botorch.acquisition.analytic import LogExpectedImprovement + + +@experimental_func("3.3.0") +def logei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Log Expected Improvement (LogEI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with single-objective optimization. + + Args: + train_x: + Previous parameter configurations. A ``torch.Tensor`` of shape + ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials + and ``n_params`` is the number of parameters. ``n_params`` may be larger than the + actual number of parameters if categorical parameters are included in the search + space, since these parameters are one-hot encoded. + Values are not normalized. + train_obj: + Previously observed objectives. A ``torch.Tensor`` of shape + ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. + ``n_objectives`` is the number of objectives. Observations are not normalized. + train_con: + Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. + ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of + constraints. A constraint is violated if strictly larger than 0. If no constraints are + involved in the optimization, this argument will be :obj:`None`. + bounds: + Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is + identical to that of ``train_x``. The first and the second rows correspond to the + lower and upper bounds for each parameter respectively. + pending_x: + Pending parameter configurations. A ``torch.Tensor`` of shape + ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already + suggested all their parameters but have not completed their evaluation, and + ``n_params`` is identical to that of ``train_x``. + + Returns: + Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. + + """ + + # We need botorch >=0.8.1 for LogExpectedImprovement. + if not _imports_logei.is_successful(): + raise ImportError( + "logei_candidates_func requires botorch >=0.8.1. " + "Please upgrade botorch or use qei_candidates_func as candidates_func instead." + ) + + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with logEI.") + n_constraints = train_con.size(1) if train_con is not None else 0 + if n_constraints > 0: + assert train_con is not None + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + if train_obj_feas.numel() == 0: + _logger.warning( + "No objective values are feasible. Using 0 as the best objective in logEI." + ) + best_f = train_obj.min() + else: + best_f = train_obj_feas.max() + + else: + train_y = train_obj + best_f = train_obj.max() + + train_x = normalize(train_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + if n_constraints > 0: + acqf = LogConstrainedExpectedImprovement( + model=model, + best_f=best_f, + objective_index=0, + constraints={i: (None, 0.0) for i in range(1, n_constraints + 1)}, + ) + else: + acqf = LogExpectedImprovement( + model=model, + best_f=best_f, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Expected Improvement (qEI). + + Args: + train_x: + Previous parameter configurations. A ``torch.Tensor`` of shape + ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials + and ``n_params`` is the number of parameters. ``n_params`` may be larger than the + actual number of parameters if categorical parameters are included in the search + space, since these parameters are one-hot encoded. + Values are not normalized. + train_obj: + Previously observed objectives. A ``torch.Tensor`` of shape + ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. + ``n_objectives`` is the number of objectives. Observations are not normalized. + train_con: + Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. + ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of + constraints. A constraint is violated if strictly larger than 0. If no constraints are + involved in the optimization, this argument will be :obj:`None`. + bounds: + Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is + identical to that of ``train_x``. The first and the second rows correspond to the + lower and upper bounds for each parameter respectively. + pending_x: + Pending parameter configurations. A ``torch.Tensor`` of shape + ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already + suggested all their parameters but have not completed their evaluation, and + ``n_params`` is identical to that of ``train_x``. + Returns: + Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. + + """ + + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with qEI.") + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + if train_obj_feas.numel() == 0: + # TODO(hvy): Do not use 0 as the best observation. + _logger.warning( + "No objective values are feasible. Using 0 as the best objective in qEI." + ) + best_f = torch.zeros(()) + else: + best_f = train_obj_feas.max() + + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: Z[..., 0], + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + best_f = train_obj.max() + + objective = None # Using the default identity objective. + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qExpectedImprovement( + model=model, + best_f=best_f, + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.3.0") +def qnei_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Noisy Expected Improvement (qNEI). + + This function may perform better than qEI (`qei_candidates_func`) when + the evaluated values of objective function are noisy. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + if train_obj.size(-1) != 1: + raise ValueError("Objective may only contain single values with qNEI.") + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: Z[..., 0], + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + objective = None # Using the default identity objective. + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qNoisyExpectedImprovement( + model=model, + X_baseline=train_x, + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=10, + raw_samples=512, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization when the number of objectives is three or less. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + is_feas = (train_con <= 0).all(dim=-1) + train_obj_feas = train_obj[is_feas] + + n_constraints = train_con.size(1) + additional_qehvi_kwargs = { + "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), + "constraints": [ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + } + else: + train_y = train_obj + + train_obj_feas = train_obj + + additional_qehvi_kwargs = {} + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_obj_feas, alpha=alpha) + + ref_point_list = ref_point.tolist() + + acqf = monte_carlo.qExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + partitioning=partitioning, + sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, + **additional_qehvi_kwargs, + ) + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.5.0") +def ehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Expected Hypervolume Improvement (EHVI). + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization without constraints. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + if train_con is not None: + raise ValueError("Constraints are not supported with ehvi_candidates_func.") + + train_y = train_obj + train_x = normalize(train_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_y, alpha=alpha) + + ref_point_list = ref_point.tolist() + + acqf = ExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + partitioning=partitioning, + ) + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("3.1.0") +def qnehvi_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based batch Noisy Expected Hypervolume Improvement (qNEHVI). + + According to Botorch/Ax documentation, + this function may perform better than qEHVI (`qehvi_candidates_func`). + (cf. https://botorch.org/tutorials/constrained_multi_objective_bo ) + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + + n_constraints = train_con.size(1) + additional_qnehvi_kwargs = { + "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), + "constraints": [ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + } + else: + train_y = train_obj + + additional_qnehvi_kwargs = {} + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + # Approximate box decomposition similar to Ax when the number of objectives is large. + # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 + if n_objectives > 4: + alpha = 10 ** (-8 + n_objectives) + else: + alpha = 0.0 + + ref_point = train_obj.min(dim=0).values - 1e-8 + + ref_point_list = ref_point.tolist() + + # prune_baseline=True is generally recommended by the documentation of BoTorch. + # cf. https://botorch.org/api/acquisition.html (accessed on 2022/11/18) + acqf = monte_carlo.qNoisyExpectedHypervolumeImprovement( + model=model, + ref_point=ref_point_list, + X_baseline=train_x, + alpha=alpha, + prune_baseline=True, + sampler=_get_sobol_qmc_normal_sampler(256), + X_pending=pending_x, + **additional_qnehvi_kwargs, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +@experimental_func("2.4.0") +def qparego_candidates_func( + train_x: "torch.Tensor", + train_obj: "torch.Tensor", + train_con: Optional["torch.Tensor"], + bounds: "torch.Tensor", + pending_x: Optional["torch.Tensor"], +) -> "torch.Tensor": + """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. + + The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` + with multi-objective optimization when the number of objectives is larger than three. + + .. seealso:: + :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value + descriptions. + """ + + n_objectives = train_obj.size(-1) + + weights = sample_simplex(n_objectives).squeeze() + scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) + + if train_con is not None: + train_y = torch.cat([train_obj, train_con], dim=-1) + n_constraints = train_con.size(1) + objective = ConstrainedMCObjective( + objective=lambda Z: scalarization(Z[..., :n_objectives]), + constraints=[ + (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) + ], + ) + else: + train_y = train_obj + + objective = GenericMCObjective(scalarization) + + train_x = normalize(train_x, bounds=bounds) + if pending_x is not None: + pending_x = normalize(pending_x, bounds=bounds) + + model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + fit_gpytorch_mll(mll) + + acqf = qExpectedImprovement( + model=model, + best_f=objective(train_y).max(), + sampler=_get_sobol_qmc_normal_sampler(256), + objective=objective, + X_pending=pending_x, + ) + + standard_bounds = torch.zeros_like(bounds) + standard_bounds[1] = 1 + + candidates, _ = optimize_acqf( + acq_function=acqf, + bounds=standard_bounds, + q=1, + num_restarts=20, + raw_samples=1024, + options={"batch_limit": 5, "maxiter": 200}, + sequential=True, + ) + + candidates = unnormalize(candidates.detach(), bounds=bounds) + + return candidates + + +def _get_default_candidates_func( + n_objectives: int, + has_constraint: bool, + consider_running_trials: bool, +) -> Callable[ + [ + "torch.Tensor", + "torch.Tensor", + Optional["torch.Tensor"], + "torch.Tensor", + Optional["torch.Tensor"], + ], + "torch.Tensor", +]: + if n_objectives > 3 and not has_constraint and not consider_running_trials: + return ehvi_candidates_func + elif n_objectives > 3: + return qparego_candidates_func + elif n_objectives > 1: + return qehvi_candidates_func + elif consider_running_trials: + return qei_candidates_func + else: + return logei_candidates_func + + +@experimental_class("2.4.0") +class BoTorchSampler(BaseSampler): + """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch. + + This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter + configurations. Parameters are transformed to continuous space and passed to BoTorch, and then + transformed back to Optuna's representations. Categorical parameters are one-hot encoded. + + .. seealso:: + See an `example `_ how to use the sampler. + + .. seealso:: + See the `BoTorch `_ homepage for details and for how to implement + your own ``candidates_func``. + + .. note:: + An instance of this sampler *should not be used with different studies* when used with + constraints. Instead, a new instance should be created for each new study. The reason for + this is that the sampler is stateful keeping all the computed constraints. + + Args: + candidates_func: + An optional function that suggests the next candidates. It must take the training + data, the objectives, the constraints, the search space bounds and return the next + candidates. The arguments are of type ``torch.Tensor``. The return value must be a + ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be + :obj:`None`. For any constraints that failed to compute, the tensor will contain + NaN. + + If omitted, it is determined automatically based on the number of objectives and + whether a constraint is specified. If the + number of objectives is one and no constraint is specified, log-Expected Improvement + is used. If constraints are specified, quasi MC-based batch Expected Improvement + (qEI) is used. + If the number of objectives is either two or three, Quasi MC-based + batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for a larger number + of objectives, analytic Expected Hypervolume Improvement is used if no constraints + are specified, or the faster Quasi MC-based extended ParEGO (qParEGO) is used if + constraints are present. + + The function should assume *maximization* of the objective. + + .. seealso:: + See :func:`optuna.integration.botorch.qei_candidates_func` for an example. + constraints_func: + An optional function that computes the objective constraints. It must take a + :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must + be a sequence of :obj:`float` s. A value strictly larger than 0 means that a + constraint is violated. A value equal to or smaller than 0 is considered feasible. + + If omitted, no constraints will be passed to ``candidates_func`` nor taken into + account during suggestion. + n_startup_trials: + Number of initial trials, that is the number of trials to resort to independent + sampling. + consider_running_trials: + If True, the acquisition function takes into consideration the running parameters + whose evaluation has not completed. Enabling this option is considered to improve the + performance of parallel optimization. + + .. note:: + Added in v3.2.0 as an experimental argument. + independent_sampler: + An independent sampler to use for the initial trials and for parameters that are + conditional. + seed: + Seed for random number generator. + device: + A ``torch.device`` to store input and output data of BoTorch. Please set a CUDA device + if you fasten sampling. + """ + + def __init__( + self, + *, + candidates_func: Optional[ + Callable[ + [ + "torch.Tensor", + "torch.Tensor", + Optional["torch.Tensor"], + "torch.Tensor", + Optional["torch.Tensor"], + ], + "torch.Tensor", + ] + ] = None, + constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, + n_startup_trials: int = 10, + consider_running_trials: bool = False, + independent_sampler: Optional[BaseSampler] = None, + seed: Optional[int] = None, + device: Optional["torch.device"] = None, + ): + _imports.check() + + self._candidates_func = candidates_func + self._constraints_func = constraints_func + self._consider_running_trials = consider_running_trials + self._independent_sampler = independent_sampler or RandomSampler(seed=seed) + self._n_startup_trials = n_startup_trials + self._seed = seed + + self._study_id: Optional[int] = None + self._search_space = IntersectionSearchSpace() + self._device = device or torch.device("cpu") + + def infer_relative_search_space( + self, + study: Study, + trial: FrozenTrial, + ) -> Dict[str, BaseDistribution]: + if self._study_id is None: + self._study_id = study._study_id + if self._study_id != study._study_id: + # Note that the check below is meaningless when `InMemoryStorage` is used + # because `InMemoryStorage.create_new_study` always returns the same study ID. + raise RuntimeError("BoTorchSampler cannot handle multiple studies.") + + search_space: Dict[str, BaseDistribution] = {} + for name, distribution in self._search_space.calculate(study).items(): + if distribution.single(): + # built-in `candidates_func` cannot handle distributions that contain just a + # single value, so we skip them. Note that the parameter values for such + # distributions are sampled in `Trial`. + continue + search_space[name] = distribution + + return search_space + + def sample_relative( + self, + study: Study, + trial: FrozenTrial, + search_space: Dict[str, BaseDistribution], + ) -> Dict[str, Any]: + assert isinstance(search_space, dict) + + if len(search_space) == 0: + return {} + + completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) + running_trials = [ + t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial + ] + trials = completed_trials + running_trials + + n_trials = len(trials) + n_completed_trials = len(completed_trials) + if n_trials < self._n_startup_trials: + return {} + + trans = _SearchSpaceTransform(search_space) + n_objectives = len(study.directions) + values: Union[numpy.ndarray, torch.Tensor] = numpy.empty( + (n_trials, n_objectives), dtype=numpy.float64 + ) + params: Union[numpy.ndarray, torch.Tensor] + con: Optional[Union[numpy.ndarray, torch.Tensor]] = None + bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds + params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) + for trial_idx, trial in enumerate(trials): + if trial.state == TrialState.COMPLETE: + params[trial_idx] = trans.transform(trial.params) + assert len(study.directions) == len(trial.values) + for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): + assert value is not None + if ( + direction == StudyDirection.MINIMIZE + ): # BoTorch always assumes maximization. + value *= -1 + values[trial_idx, obj_idx] = value + if self._constraints_func is not None: + constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( + _CONSTRAINTS_KEY + ) + if constraints is not None: + n_constraints = len(constraints) + + if con is None: + con = numpy.full( + (n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64 + ) + elif n_constraints != con.shape[1]: + raise RuntimeError( + f"Expected {con.shape[1]} constraints " + f"but received {n_constraints}." + ) + con[trial_idx] = constraints + elif trial.state == TrialState.RUNNING: + if all(p in trial.params for p in search_space): + params[trial_idx] = trans.transform(trial.params) + else: + params[trial_idx] = numpy.nan + else: + assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING." + + if self._constraints_func is not None: + if con is None: + warnings.warn( + "`constraints_func` was given but no call to it correctly computed " + "constraints. Constraints passed to `candidates_func` will be `None`." + ) + elif numpy.isnan(con).any(): + warnings.warn( + "`constraints_func` was given but some calls to it did not correctly compute " + "constraints. Constraints passed to `candidates_func` will contain NaN." + ) + + values = torch.from_numpy(values).to(self._device) + params = torch.from_numpy(params).to(self._device) + if con is not None: + con = torch.from_numpy(con).to(self._device) + bounds = torch.from_numpy(bounds).to(self._device) + + if con is not None: + if con.dim() == 1: + con.unsqueeze_(-1) + bounds.transpose_(0, 1) + + if self._candidates_func is None: + self._candidates_func = _get_default_candidates_func( + n_objectives=n_objectives, + has_constraint=con is not None, + consider_running_trials=self._consider_running_trials, + ) + + completed_values = values[:n_completed_trials] + completed_params = params[:n_completed_trials] + if self._consider_running_trials: + running_params = params[n_completed_trials:] + running_params = running_params[~torch.isnan(running_params).any(dim=1)] + else: + running_params = None + + with manual_seed(self._seed): + # `manual_seed` makes the default candidates functions reproducible. + # `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is + # deterministic when the BoTorch's seed is fixed. + candidates = self._candidates_func( + completed_params, completed_values, con, bounds, running_params + ) + if self._seed is not None: + self._seed += 1 + + if not isinstance(candidates, torch.Tensor): + raise TypeError("Candidates must be a torch.Tensor.") + if candidates.dim() == 2: + if candidates.size(0) != 1: + raise ValueError( + "Candidates batch optimization is not supported and the first dimension must " + "have size 1 if candidates is a two-dimensional tensor. Actual: " + f"{candidates.size()}." + ) + # Batch size is one. Get rid of the batch dimension. + candidates = candidates.squeeze(0) + if candidates.dim() != 1: + raise ValueError("Candidates must be one or two-dimensional.") + if candidates.size(0) != bounds.size(1): + raise ValueError( + "Candidates size must match with the given bounds. Actual candidates: " + f"{candidates.size(0)}, bounds: {bounds.size(1)}." + ) + + return trans.untransform(candidates.cpu().numpy()) + + def sample_independent( + self, + study: Study, + trial: FrozenTrial, + param_name: str, + param_distribution: BaseDistribution, + ) -> Any: + return self._independent_sampler.sample_independent( + study, trial, param_name, param_distribution + ) + + def reseed_rng(self) -> None: + self._independent_sampler.reseed_rng() + if self._seed is not None: + self._seed = numpy.random.RandomState().randint(numpy.iinfo(numpy.int32).max) + + def before_trial(self, study: Study, trial: FrozenTrial) -> None: + self._independent_sampler.before_trial(study, trial) + + def after_trial( + self, + study: Study, + trial: FrozenTrial, + state: TrialState, + values: Optional[Sequence[float]], + ) -> None: + if self._constraints_func is not None: + _process_constraints_after_trial(self._constraints_func, study, trial, state) + self._independent_sampler.after_trial(study, trial, state, values) From 77433e9ffe3686788d2610dda8fcf332dfa6b6fb Mon Sep 17 00:00:00 2001 From: y0z Date: Tue, 6 Feb 2024 18:37:18 +0900 Subject: [PATCH 2/7] Fetched from https://github.com/optuna/optuna/commit/bc4ee3e0018ccf8e809d2e42102fec67b3b3e838. --- tests/integration_tests/test_botorch.py | 559 ++++++++++++++++++++++++ tests/test_botorch.py | 559 ++++++++++++++++++++++++ 2 files changed, 1118 insertions(+) create mode 100644 tests/integration_tests/test_botorch.py create mode 100644 tests/test_botorch.py diff --git a/tests/integration_tests/test_botorch.py b/tests/integration_tests/test_botorch.py new file mode 100644 index 00000000..439c84ec --- /dev/null +++ b/tests/integration_tests/test_botorch.py @@ -0,0 +1,559 @@ +from typing import Any +from typing import Optional +from typing import Sequence +from typing import Tuple +from unittest.mock import patch +import warnings + +from packaging import version +import pytest + +import optuna +from optuna import integration +from optuna._imports import try_import +from optuna.integration import BoTorchSampler +from optuna.samplers import RandomSampler +from optuna.samplers._base import _CONSTRAINTS_KEY +from optuna.storages import RDBStorage +from optuna.trial import FrozenTrial +from optuna.trial import Trial +from optuna.trial import TrialState + + +with try_import() as _imports: + import botorch + import torch + +if not _imports.is_successful(): + from unittest.mock import MagicMock + + torch = MagicMock() # NOQA + +pytestmark = pytest.mark.integration + + +@pytest.mark.parametrize("n_objectives", [1, 2, 4]) +def test_botorch_candidates_func_none(n_objectives: int) -> None: + if n_objectives == 1 and version.parse(botorch.version.version) < version.parse("0.8.1"): + pytest.skip("botorch >=0.8.1 is required for logei_candidates_func.") + + n_trials = 3 + n_startup_trials = 2 + + sampler = BoTorchSampler(n_startup_trials=n_startup_trials) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + + # TODO(hvy): Do not check for the correct candidates function using private APIs. + if n_objectives == 1: + assert sampler._candidates_func is integration.botorch.logei_candidates_func + elif n_objectives == 2: + assert sampler._candidates_func is integration.botorch.qehvi_candidates_func + elif n_objectives == 4: + assert sampler._candidates_func is integration.botorch.ehvi_candidates_func + else: + assert False, "Should not reach." + + +def test_botorch_candidates_func() -> None: + candidates_func_call_count = 0 + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + assert train_con is None + + candidates = torch.rand(1) + + nonlocal candidates_func_call_count + candidates_func_call_count += 1 + + return candidates + + n_trials = 3 + n_startup_trials = 1 + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=n_startup_trials) + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=n_trials) + + assert len(study.trials) == n_trials + assert candidates_func_call_count == n_trials - n_startup_trials + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.ehvi_candidates_func, 4), + (integration.botorch.ehvi_candidates_func, 5), # alpha > 0 + (integration.botorch.logei_candidates_func, 1), + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qnei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qehvi_candidates_func, 7), # alpha > 0 + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 6), # alpha > 0 + ], +) +def test_botorch_specify_candidates_func(candidates_func: Any, n_objectives: int) -> None: + if candidates_func == integration.botorch.logei_candidates_func and version.parse( + botorch.version.version + ) < version.parse("0.8.1"): + pytest.skip("LogExpectedImprovement is not available in botorch <0.8.1.") + + n_trials = 4 + n_startup_trials = 2 + + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=n_startup_trials, + ) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.logei_candidates_func, 1), + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 + ], +) +def test_botorch_specify_candidates_func_constrained( + candidates_func: Any, n_objectives: int +) -> None: + n_trials = 4 + n_startup_trials = 2 + constraints_func_call_count = 0 + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + xs = sum(trial.params[f"x{i}"] for i in range(n_objectives)) + + nonlocal constraints_func_call_count + constraints_func_call_count += 1 + + return (xs - 0.5,) + + sampler = BoTorchSampler( + constraints_func=constraints_func, + candidates_func=candidates_func, + n_startup_trials=n_startup_trials, + ) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + assert constraints_func_call_count == n_trials + + +def test_botorch_candidates_func_invalid_batch_size() -> None: + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(2, 1) # Must have the batch size one, not two. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_candidates_func_invalid_dimensionality() -> None: + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(1, 1, 1) # Must have one or two dimensions, not three. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_candidates_func_invalid_candidates_size() -> None: + n_params = 3 + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(n_params - 1) # Must return candidates for all parameters. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize( + lambda t: sum(t.suggest_float(f"x{i}", 0, 1) for i in range(n_params)), n_trials=3 + ) + + +def test_botorch_constraints_func_invalid_inconsistent_n_constraints() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + x0 = trial.params["x0"] + return [x0 - 0.5] * trial.number # Number of constraints may not change. + + sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_constraints_func_raises() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + if trial.number == 1: + raise RuntimeError + return (0.0,) + + sampler = BoTorchSampler(constraints_func=constraints_func) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + assert len(study.trials) == 2 + + for trial in study.trials: + sys_con = trial.system_attrs[_CONSTRAINTS_KEY] + + expected_sys_con: Optional[Tuple[int]] + + if trial.number == 0: + expected_sys_con = (0,) + elif trial.number == 1: + expected_sys_con = None + else: + assert False, "Should not reach." + + assert sys_con == expected_sys_con + + +def test_botorch_constraints_func_nan_warning() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + if trial.number == 1: + raise RuntimeError + return (0.0,) + + last_trial_number_candidates_func = None + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + trial_number = train_x.size(0) + + assert train_con is not None + + if trial_number > 0: + assert not train_con[0, :].isnan().any() + if trial_number > 1: + assert train_con[1, :].isnan().all() + if trial_number > 2: + assert not train_con[2, :].isnan().any() + + nonlocal last_trial_number_candidates_func + last_trial_number_candidates_func = trial_number + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) + + assert len(study.trials) == 2 + + # Warns when `train_con` contains NaN. + with pytest.warns(UserWarning): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 4 + + assert last_trial_number_candidates_func == study.trials[-1].number + + +def test_botorch_constraints_func_none_warning() -> None: + candidates_func_call_count = 0 + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + raise RuntimeError + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + # `train_con` should be `None` if `constraints_func` always fails. + assert train_con is None + + nonlocal candidates_func_call_count + candidates_func_call_count += 1 + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) + + assert len(study.trials) == 1 + + # Warns when `train_con` becomes `None`. + with pytest.warns(UserWarning), pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=1) + + assert len(study.trials) == 2 + + assert candidates_func_call_count == 1 + + +def test_botorch_constraints_func_late() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + return (0,) + + last_trial_number_candidates_func = None + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + trial_number = train_x.size(0) + + if trial_number < 3: + assert train_con is None + if trial_number == 3: + assert train_con is not None + assert train_con[:2, :].isnan().all() + assert not train_con[2, :].isnan().any() + + nonlocal last_trial_number_candidates_func + last_trial_number_candidates_func = trial_number + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 2 + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study.sampler = sampler + + # Warns when `train_con` contains NaN. Should not raise but will with NaN for previous trials + # that were not computed with constraints. + with pytest.warns(UserWarning): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 4 + + assert last_trial_number_candidates_func == study.trials[-1].number + + +def test_botorch_n_startup_trials() -> None: + independent_sampler = RandomSampler() + sampler = BoTorchSampler(n_startup_trials=2, independent_sampler=independent_sampler) + study = optuna.create_study(directions=["minimize", "maximize"], sampler=sampler) + + with patch.object( + independent_sampler, "sample_independent", wraps=independent_sampler.sample_independent + ) as mock_independent, patch.object( + sampler, "sample_relative", wraps=sampler.sample_relative + ) as mock_relative: + study.optimize( + lambda t: [t.suggest_float("x0", 0, 1), t.suggest_float("x1", 0, 1)], n_trials=3 + ) + assert mock_independent.call_count == 4 # The objective function has two parameters. + assert mock_relative.call_count == 3 + + +def test_botorch_distributions() -> None: + def objective(trial: Trial) -> float: + x0 = trial.suggest_float("x0", 0, 1) + x1 = trial.suggest_float("x1", 0.1, 1, log=True) + x2 = trial.suggest_float("x2", 0, 1, step=0.1) + x3 = trial.suggest_int("x3", 0, 2) + x4 = trial.suggest_int("x4", 2, 4, log=True) + x5 = trial.suggest_int("x5", 0, 4, step=2) + x6 = trial.suggest_categorical("x6", [0.1, 0.2, 0.3]) + return x0 + x1 + x2 + x3 + x4 + x5 + x6 + + sampler = BoTorchSampler() + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(objective, n_trials=3) + + assert len(study.trials) == 3 + + +def test_botorch_invalid_different_studies() -> None: + # Using the same sampler with different studies should yield an error since the sampler is + # stateful holding the computed constraints. Two studies are considered different if their + # IDs differ. + # We use the RDB storage since this check does not work for the in-memory storage where all + # study IDs are identically 0. + storage = RDBStorage("sqlite:///:memory:") + + sampler = BoTorchSampler() + + study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + other_study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) + with pytest.raises(RuntimeError): + other_study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_call_after_trial_of_independent_sampler() -> None: + independent_sampler = optuna.samplers.RandomSampler() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = BoTorchSampler(independent_sampler=independent_sampler) + study = optuna.create_study(sampler=sampler) + with patch.object( + independent_sampler, "after_trial", wraps=independent_sampler.after_trial + ) as mock_object: + study.optimize(lambda _: 1.0, n_trials=1) + assert mock_object.call_count == 1 + + +@pytest.mark.parametrize("device", [None, torch.device("cpu"), torch.device("cuda:0")]) +def test_device_argument(device: Optional[torch.device]) -> None: + sampler = BoTorchSampler(device=device) + if not torch.cuda.is_available() and sampler._device.type == "cuda": + pytest.skip(reason="GPU is unavailable.") + + def objective(trial: Trial) -> float: + return trial.suggest_float("x", 0.0, 1.0) + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + x0 = trial.params["x"] + return [x0 - 0.5] + + sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=3) + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 + ], +) +def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None: + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=1, + consider_running_trials=True, + ) + + def objective(trial: Trial) -> Sequence[float]: + ret = [] + for i in range(n_objectives): + val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2)) + ret.append(val) + return ret + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize(objective, n_trials=2) + assert len(study.trials) == 2 + + # fully suggested running trial + running_trial_full = study.ask() + _ = objective(running_trial_full) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 4 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3 + + # partially suggested running trial + running_trial_partial = study.ask() + for i in range(n_objectives): + running_trial_partial.suggest_float(f"x{i}_0", 0, 1) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 6 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4 + + # not suggested running trial + _ = study.ask() + study.optimize(objective, n_trials=1) + assert len(study.trials) == 8 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5 diff --git a/tests/test_botorch.py b/tests/test_botorch.py new file mode 100644 index 00000000..439c84ec --- /dev/null +++ b/tests/test_botorch.py @@ -0,0 +1,559 @@ +from typing import Any +from typing import Optional +from typing import Sequence +from typing import Tuple +from unittest.mock import patch +import warnings + +from packaging import version +import pytest + +import optuna +from optuna import integration +from optuna._imports import try_import +from optuna.integration import BoTorchSampler +from optuna.samplers import RandomSampler +from optuna.samplers._base import _CONSTRAINTS_KEY +from optuna.storages import RDBStorage +from optuna.trial import FrozenTrial +from optuna.trial import Trial +from optuna.trial import TrialState + + +with try_import() as _imports: + import botorch + import torch + +if not _imports.is_successful(): + from unittest.mock import MagicMock + + torch = MagicMock() # NOQA + +pytestmark = pytest.mark.integration + + +@pytest.mark.parametrize("n_objectives", [1, 2, 4]) +def test_botorch_candidates_func_none(n_objectives: int) -> None: + if n_objectives == 1 and version.parse(botorch.version.version) < version.parse("0.8.1"): + pytest.skip("botorch >=0.8.1 is required for logei_candidates_func.") + + n_trials = 3 + n_startup_trials = 2 + + sampler = BoTorchSampler(n_startup_trials=n_startup_trials) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + + # TODO(hvy): Do not check for the correct candidates function using private APIs. + if n_objectives == 1: + assert sampler._candidates_func is integration.botorch.logei_candidates_func + elif n_objectives == 2: + assert sampler._candidates_func is integration.botorch.qehvi_candidates_func + elif n_objectives == 4: + assert sampler._candidates_func is integration.botorch.ehvi_candidates_func + else: + assert False, "Should not reach." + + +def test_botorch_candidates_func() -> None: + candidates_func_call_count = 0 + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + assert train_con is None + + candidates = torch.rand(1) + + nonlocal candidates_func_call_count + candidates_func_call_count += 1 + + return candidates + + n_trials = 3 + n_startup_trials = 1 + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=n_startup_trials) + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=n_trials) + + assert len(study.trials) == n_trials + assert candidates_func_call_count == n_trials - n_startup_trials + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.ehvi_candidates_func, 4), + (integration.botorch.ehvi_candidates_func, 5), # alpha > 0 + (integration.botorch.logei_candidates_func, 1), + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qnei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qehvi_candidates_func, 7), # alpha > 0 + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 6), # alpha > 0 + ], +) +def test_botorch_specify_candidates_func(candidates_func: Any, n_objectives: int) -> None: + if candidates_func == integration.botorch.logei_candidates_func and version.parse( + botorch.version.version + ) < version.parse("0.8.1"): + pytest.skip("LogExpectedImprovement is not available in botorch <0.8.1.") + + n_trials = 4 + n_startup_trials = 2 + + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=n_startup_trials, + ) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.logei_candidates_func, 1), + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 + ], +) +def test_botorch_specify_candidates_func_constrained( + candidates_func: Any, n_objectives: int +) -> None: + n_trials = 4 + n_startup_trials = 2 + constraints_func_call_count = 0 + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + xs = sum(trial.params[f"x{i}"] for i in range(n_objectives)) + + nonlocal constraints_func_call_count + constraints_func_call_count += 1 + + return (xs - 0.5,) + + sampler = BoTorchSampler( + constraints_func=constraints_func, + candidates_func=candidates_func, + n_startup_trials=n_startup_trials, + ) + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize( + lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials + ) + + assert len(study.trials) == n_trials + assert constraints_func_call_count == n_trials + + +def test_botorch_candidates_func_invalid_batch_size() -> None: + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(2, 1) # Must have the batch size one, not two. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_candidates_func_invalid_dimensionality() -> None: + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(1, 1, 1) # Must have one or two dimensions, not three. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_candidates_func_invalid_candidates_size() -> None: + n_params = 3 + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + return torch.rand(n_params - 1) # Must return candidates for all parameters. + + sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(ValueError): + study.optimize( + lambda t: sum(t.suggest_float(f"x{i}", 0, 1) for i in range(n_params)), n_trials=3 + ) + + +def test_botorch_constraints_func_invalid_inconsistent_n_constraints() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + x0 = trial.params["x0"] + return [x0 - 0.5] * trial.number # Number of constraints may not change. + + sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_botorch_constraints_func_raises() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + if trial.number == 1: + raise RuntimeError + return (0.0,) + + sampler = BoTorchSampler(constraints_func=constraints_func) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + assert len(study.trials) == 2 + + for trial in study.trials: + sys_con = trial.system_attrs[_CONSTRAINTS_KEY] + + expected_sys_con: Optional[Tuple[int]] + + if trial.number == 0: + expected_sys_con = (0,) + elif trial.number == 1: + expected_sys_con = None + else: + assert False, "Should not reach." + + assert sys_con == expected_sys_con + + +def test_botorch_constraints_func_nan_warning() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + if trial.number == 1: + raise RuntimeError + return (0.0,) + + last_trial_number_candidates_func = None + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + trial_number = train_x.size(0) + + assert train_con is not None + + if trial_number > 0: + assert not train_con[0, :].isnan().any() + if trial_number > 1: + assert train_con[1, :].isnan().all() + if trial_number > 2: + assert not train_con[2, :].isnan().any() + + nonlocal last_trial_number_candidates_func + last_trial_number_candidates_func = trial_number + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) + + assert len(study.trials) == 2 + + # Warns when `train_con` contains NaN. + with pytest.warns(UserWarning): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 4 + + assert last_trial_number_candidates_func == study.trials[-1].number + + +def test_botorch_constraints_func_none_warning() -> None: + candidates_func_call_count = 0 + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + raise RuntimeError + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + # `train_con` should be `None` if `constraints_func` always fails. + assert train_con is None + + nonlocal candidates_func_call_count + candidates_func_call_count += 1 + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + + with pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) + + assert len(study.trials) == 1 + + # Warns when `train_con` becomes `None`. + with pytest.warns(UserWarning), pytest.raises(RuntimeError): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=1) + + assert len(study.trials) == 2 + + assert candidates_func_call_count == 1 + + +def test_botorch_constraints_func_late() -> None: + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + return (0,) + + last_trial_number_candidates_func = None + + def candidates_func( + train_x: torch.Tensor, + train_obj: torch.Tensor, + train_con: Optional[torch.Tensor], + bounds: torch.Tensor, + running_x: Optional[torch.Tensor], + ) -> torch.Tensor: + trial_number = train_x.size(0) + + if trial_number < 3: + assert train_con is None + if trial_number == 3: + assert train_con is not None + assert train_con[:2, :].isnan().all() + assert not train_con[2, :].isnan().any() + + nonlocal last_trial_number_candidates_func + last_trial_number_candidates_func = trial_number + + return torch.rand(1) + + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=1, + ) + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 2 + + sampler = BoTorchSampler( + candidates_func=candidates_func, + constraints_func=constraints_func, + n_startup_trials=1, + ) + + study.sampler = sampler + + # Warns when `train_con` contains NaN. Should not raise but will with NaN for previous trials + # that were not computed with constraints. + with pytest.warns(UserWarning): + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) + + assert len(study.trials) == 4 + + assert last_trial_number_candidates_func == study.trials[-1].number + + +def test_botorch_n_startup_trials() -> None: + independent_sampler = RandomSampler() + sampler = BoTorchSampler(n_startup_trials=2, independent_sampler=independent_sampler) + study = optuna.create_study(directions=["minimize", "maximize"], sampler=sampler) + + with patch.object( + independent_sampler, "sample_independent", wraps=independent_sampler.sample_independent + ) as mock_independent, patch.object( + sampler, "sample_relative", wraps=sampler.sample_relative + ) as mock_relative: + study.optimize( + lambda t: [t.suggest_float("x0", 0, 1), t.suggest_float("x1", 0, 1)], n_trials=3 + ) + assert mock_independent.call_count == 4 # The objective function has two parameters. + assert mock_relative.call_count == 3 + + +def test_botorch_distributions() -> None: + def objective(trial: Trial) -> float: + x0 = trial.suggest_float("x0", 0, 1) + x1 = trial.suggest_float("x1", 0.1, 1, log=True) + x2 = trial.suggest_float("x2", 0, 1, step=0.1) + x3 = trial.suggest_int("x3", 0, 2) + x4 = trial.suggest_int("x4", 2, 4, log=True) + x5 = trial.suggest_int("x5", 0, 4, step=2) + x6 = trial.suggest_categorical("x6", [0.1, 0.2, 0.3]) + return x0 + x1 + x2 + x3 + x4 + x5 + x6 + + sampler = BoTorchSampler() + + study = optuna.create_study(direction="minimize", sampler=sampler) + study.optimize(objective, n_trials=3) + + assert len(study.trials) == 3 + + +def test_botorch_invalid_different_studies() -> None: + # Using the same sampler with different studies should yield an error since the sampler is + # stateful holding the computed constraints. Two studies are considered different if their + # IDs differ. + # We use the RDB storage since this check does not work for the in-memory storage where all + # study IDs are identically 0. + storage = RDBStorage("sqlite:///:memory:") + + sampler = BoTorchSampler() + + study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) + study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + other_study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) + with pytest.raises(RuntimeError): + other_study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) + + +def test_call_after_trial_of_independent_sampler() -> None: + independent_sampler = optuna.samplers.RandomSampler() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = BoTorchSampler(independent_sampler=independent_sampler) + study = optuna.create_study(sampler=sampler) + with patch.object( + independent_sampler, "after_trial", wraps=independent_sampler.after_trial + ) as mock_object: + study.optimize(lambda _: 1.0, n_trials=1) + assert mock_object.call_count == 1 + + +@pytest.mark.parametrize("device", [None, torch.device("cpu"), torch.device("cuda:0")]) +def test_device_argument(device: Optional[torch.device]) -> None: + sampler = BoTorchSampler(device=device) + if not torch.cuda.is_available() and sampler._device.type == "cuda": + pytest.skip(reason="GPU is unavailable.") + + def objective(trial: Trial) -> float: + return trial.suggest_float("x", 0.0, 1.0) + + def constraints_func(trial: FrozenTrial) -> Sequence[float]: + x0 = trial.params["x"] + return [x0 - 0.5] + + sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=3) + + +@pytest.mark.parametrize( + "candidates_func, n_objectives", + [ + (integration.botorch.qei_candidates_func, 1), + (integration.botorch.qehvi_candidates_func, 2), + (integration.botorch.qparego_candidates_func, 4), + (integration.botorch.qnehvi_candidates_func, 2), + (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 + ], +) +def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None: + sampler = BoTorchSampler( + candidates_func=candidates_func, + n_startup_trials=1, + consider_running_trials=True, + ) + + def objective(trial: Trial) -> Sequence[float]: + ret = [] + for i in range(n_objectives): + val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2)) + ret.append(val) + return ret + + study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) + study.optimize(objective, n_trials=2) + assert len(study.trials) == 2 + + # fully suggested running trial + running_trial_full = study.ask() + _ = objective(running_trial_full) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 4 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3 + + # partially suggested running trial + running_trial_partial = study.ask() + for i in range(n_objectives): + running_trial_partial.suggest_float(f"x{i}_0", 0, 1) + study.optimize(objective, n_trials=1) + assert len(study.trials) == 6 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4 + + # not suggested running trial + _ = study.ask() + study.optimize(objective, n_trials=1) + assert len(study.trials) == 8 + assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3 + assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5 From 12a6520bd9683c1f1396d0952d08a74a2a15403c Mon Sep 17 00:00:00 2001 From: y0z Date: Tue, 6 Feb 2024 19:32:50 +0900 Subject: [PATCH 3/7] Add BoTorch integration. --- README.md | 1 + docs/source/reference/index.rst | 16 + optuna_integration/__init__.py | 3 + pyproject.toml | 3 + tests/test_samplers.py | 792 ++++++++++++++++++++++++++++++++ 5 files changed, 815 insertions(+) create mode 100644 tests/test_samplers.py diff --git a/README.md b/README.md index 92c1a086..01d8a85a 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ integrated with many useful tools like PyTorch, sklearn, TensorFlow, etc. Optuna-Integration API reference is [here](https://optuna-integration.readthedocs.io/en/stable/reference/index.html). * [AllenNLP](https://optuna-integration.readthedocs.io/en/stable/reference/index.html#allennlp) ([example](https://github.com/optuna/optuna-examples/tree/main/allennlp)) +* [BoTorch](https://optuna-integration.readthedocs.io/en/stable/reference/index.html#botorch) * [Catalyst](https://optuna-integration.readthedocs.io/en/stable/reference/index.html#catalyst) ([example](https://github.com/optuna/optuna-examples/blob/main/pytorch/catalyst_simple.py)) * [CatBoost](https://optuna-integration.readthedocs.io/en/stable/reference/index.html#catboost) ([example](https://github.com/optuna/optuna-examples/blob/main/catboost/catboost_pruning.py)) * [Chainer](https://optuna-integration.readthedocs.io/en/stable/reference/index.html#chainer) ([example](https://github.com/optuna/optuna-examples/tree/main/chainer/chainer_integration.py)) diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 1cdd6629..687985db 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -27,6 +27,22 @@ AllenNLP optuna_integration.allennlp.dump_best_config optuna_integration.AllenNLPPruningCallback +BoTorch +------- + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + optuna_integration.BoTorchSampler + optuna_integration.botorch.ehvi_candidates_func + optuna_integration.botorch.logei_candidates_func + optuna_integration.botorch.qei_candidates_func + optuna_integration.botorch.qnei_candidates_func + optuna_integration.botorch.qehvi_candidates_func + optuna_integration.botorch.qnehvi_candidates_func + optuna_integration.botorch.qparego_candidates_func + Catalyst -------- diff --git a/optuna_integration/__init__.py b/optuna_integration/__init__.py index 10d47ef6..487b63a0 100644 --- a/optuna_integration/__init__.py +++ b/optuna_integration/__init__.py @@ -7,6 +7,7 @@ _import_structure = { "allennlp": ["AllenNLPExecutor", "AllenNLPPruningCallback"], + "botorch": ["BoTorchSampler"], "catalyst": ["CatalystPruningCallback"], "catboost": ["CatBoostPruningCallback"], "chainer": ["ChainerPruningExtension"], @@ -27,6 +28,7 @@ if TYPE_CHECKING: from optuna_integration.allennlp import AllenNLPExecutor from optuna_integration.allennlp import AllenNLPPruningCallback + from optuna_integration.botorch import BoTorchSampler from optuna_integration.catalyst import CatalystPruningCallback from optuna_integration.catboost import CatBoostPruningCallback from optuna_integration.chainer import ChainerPruningExtension @@ -92,6 +94,7 @@ def _get_module(self, module_name: str) -> ModuleType: __all__ = [ "AllenNLPExecutor", "AllenNLPPruningCallback", + "BoTorchSampler", "CatalystPruningCallback", "CatBoostPruningCallback", "ChainerMNStudy", diff --git a/pyproject.toml b/pyproject.toml index fc4403ed..a102809f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,11 +55,13 @@ document = [ "sphinx_rtd_theme", ] all = [ + "botorch", "catalyst", "catboost>=0.26; sys_platform!='darwin'", "catboost>=0.26,<1.2; sys_platform=='darwin'", "distributed", "fastai", + "gpytorch", "mxnet", "pandas", "shap", @@ -68,6 +70,7 @@ all = [ "skorch", "tensorboard", "tensorflow", + "torch", ] [tool.setuptools.packages.find] diff --git a/tests/test_samplers.py b/tests/test_samplers.py new file mode 100644 index 00000000..4977e3ff --- /dev/null +++ b/tests/test_samplers.py @@ -0,0 +1,792 @@ +from __future__ import annotations + +from collections.abc import Callable +from collections.abc import Sequence +import multiprocessing +from multiprocessing.managers import DictProxy +import os +from typing import Any +from unittest.mock import patch +import warnings + +from _pytest.fixtures import SubRequest +from _pytest.mark.structures import MarkDecorator +import numpy as np +import optuna +from optuna.distributions import BaseDistribution +from optuna.distributions import CategoricalChoiceType +from optuna.distributions import CategoricalDistribution +from optuna.distributions import FloatDistribution +from optuna.distributions import IntDistribution +from optuna.integration.botorch import logei_candidates_func +from optuna.integration.botorch import qei_candidates_func +from optuna.samplers import BaseSampler +from optuna.samplers._lazy_random_state import LazyRandomState +from optuna.study import Study +from optuna.trial import FrozenTrial +from optuna.trial import Trial +from optuna.trial import TrialState +import pytest + + +parametrize_sampler = pytest.mark.parametrize( + "sampler_class", + [ + pytest.param( + lambda: optuna.integration.BoTorchSampler( + n_startup_trials=0, + candidates_func=logei_candidates_func, + ), + marks=pytest.mark.integration, + ), + pytest.param( + lambda: optuna.integration.BoTorchSampler( + n_startup_trials=0, + candidates_func=qei_candidates_func, + ), + marks=pytest.mark.integration, + ), + ], +) +parametrize_relative_sampler = pytest.mark.parametrize( + "relative_sampler_class", + [], +) +parametrize_multi_objective_sampler = pytest.mark.parametrize( + "multi_objective_sampler_class", + [ + pytest.param( + lambda: optuna.integration.BoTorchSampler(n_startup_trials=0), + marks=pytest.mark.integration, + ), + ], +) + + +sampler_class_with_seed: dict[str, tuple[Callable[[int], BaseSampler], bool]] = { + "BoTorchSampler": (lambda seed: optuna.integration.BoTorchSampler(seed=seed), True), +} +param_sampler_with_seed = [] +param_sampler_name_with_seed = [] +for sampler_name, (sampler_class, integration_flag) in sampler_class_with_seed.items(): + if integration_flag: + param_sampler_with_seed.append( + pytest.param(sampler_class, id=sampler_name, marks=pytest.mark.integration) + ) + param_sampler_name_with_seed.append( + pytest.param(sampler_name, marks=pytest.mark.integration) + ) + else: + param_sampler_with_seed.append(pytest.param(sampler_class, id=sampler_name)) + param_sampler_name_with_seed.append(pytest.param(sampler_name)) +parametrize_sampler_with_seed = pytest.mark.parametrize("sampler_class", param_sampler_with_seed) +parametrize_sampler_name_with_seed = pytest.mark.parametrize( + "sampler_name", param_sampler_name_with_seed +) + + +@pytest.mark.parametrize( + "sampler_class,expected_has_rng,expected_has_another_sampler", + [ + pytest.param( + lambda: optuna.integration.BoTorchSampler(n_startup_trials=0), + False, + True, + marks=pytest.mark.integration, + ), + ], +) +def test_sampler_reseed_rng( + sampler_class: Callable[[], BaseSampler], + expected_has_rng: bool, + expected_has_another_sampler: bool, +) -> None: + def _extract_attr_name_from_sampler_by_cls(sampler: BaseSampler, cls: Any) -> str | None: + for name, attr in sampler.__dict__.items(): + if isinstance(attr, cls): + return name + return None + + sampler = sampler_class() + + rng_name = _extract_attr_name_from_sampler_by_cls(sampler, LazyRandomState) + has_rng = rng_name is not None + assert expected_has_rng == has_rng + if has_rng: + rng_name = str(rng_name) + original_random_state = sampler.__dict__[rng_name].rng.get_state() + sampler.reseed_rng() + random_state = sampler.__dict__[rng_name].rng.get_state() + if not isinstance(sampler, optuna.samplers.CmaEsSampler): + assert str(original_random_state) != str(random_state) + else: + # CmaEsSampler has a RandomState that is not reseed by its reseed_rng method. + assert str(original_random_state) == str(random_state) + + had_sampler_name = _extract_attr_name_from_sampler_by_cls(sampler, BaseSampler) + has_another_sampler = had_sampler_name is not None + assert expected_has_another_sampler == has_another_sampler + + if has_another_sampler: + had_sampler_name = str(had_sampler_name) + had_sampler = sampler.__dict__[had_sampler_name] + had_sampler_rng_name = _extract_attr_name_from_sampler_by_cls(had_sampler, LazyRandomState) + original_had_sampler_random_state = had_sampler.__dict__[ + had_sampler_rng_name + ].rng.get_state() + with patch.object( + had_sampler, + "reseed_rng", + wraps=had_sampler.reseed_rng, + ) as mock_object: + sampler.reseed_rng() + assert mock_object.call_count == 1 + + had_sampler = sampler.__dict__[had_sampler_name] + had_sampler_random_state = had_sampler.__dict__[had_sampler_rng_name].rng.get_state() + assert str(original_had_sampler_random_state) != str(had_sampler_random_state) + + +def parametrize_suggest_method(name: str) -> MarkDecorator: + return pytest.mark.parametrize( + f"suggest_method_{name}", + [ + lambda t: t.suggest_float(name, 0, 10), + lambda t: t.suggest_int(name, 0, 10), + lambda t: t.suggest_categorical(name, [0, 1, 2]), + lambda t: t.suggest_float(name, 0, 10, step=0.5), + lambda t: t.suggest_float(name, 1e-7, 10, log=True), + lambda t: t.suggest_int(name, 1, 10, log=True), + ], + ) + + +@pytest.mark.parametrize( + "sampler_class", + [], +) +def test_raise_error_for_samplers_during_multi_objectives( + sampler_class: Callable[[], BaseSampler] +) -> None: + study = optuna.study.create_study(directions=["maximize", "maximize"], sampler=sampler_class()) + + distribution = FloatDistribution(0.0, 1.0) + with pytest.raises(ValueError): + study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution) + + with pytest.raises(ValueError): + trial = _create_new_trial(study) + study.sampler.sample_relative( + study, trial, study.sampler.infer_relative_search_space(study, trial) + ) + + +@parametrize_sampler +@pytest.mark.parametrize( + "distribution", + [ + FloatDistribution(-1.0, 1.0), + FloatDistribution(0.0, 1.0), + FloatDistribution(-1.0, 0.0), + FloatDistribution(1e-7, 1.0, log=True), + FloatDistribution(-10, 10, step=0.1), + FloatDistribution(-10.2, 10.2, step=0.1), + ], +) +def test_float( + sampler_class: Callable[[], BaseSampler], + distribution: FloatDistribution, +) -> None: + study = optuna.study.create_study(sampler=sampler_class()) + points = np.array( + [ + study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution) + for _ in range(100) + ] + ) + assert np.all(points >= distribution.low) + assert np.all(points <= distribution.high) + assert not isinstance( + study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution), + np.floating, + ) + + if distribution.step is not None: + # Check all points are multiples of distribution.step. + points -= distribution.low + points /= distribution.step + round_points = np.round(points) + np.testing.assert_almost_equal(round_points, points) + + +@parametrize_sampler +@pytest.mark.parametrize( + "distribution", + [ + IntDistribution(-10, 10), + IntDistribution(0, 10), + IntDistribution(-10, 0), + IntDistribution(-10, 10, step=2), + IntDistribution(0, 10, step=2), + IntDistribution(-10, 0, step=2), + IntDistribution(1, 100, log=True), + ], +) +def test_int(sampler_class: Callable[[], BaseSampler], distribution: IntDistribution) -> None: + study = optuna.study.create_study(sampler=sampler_class()) + points = np.array( + [ + study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution) + for _ in range(100) + ] + ) + assert np.all(points >= distribution.low) + assert np.all(points <= distribution.high) + assert not isinstance( + study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution), + np.integer, + ) + + +@parametrize_sampler +@pytest.mark.parametrize("choices", [(1, 2, 3), ("a", "b", "c"), (1, "a")]) +def test_categorical( + sampler_class: Callable[[], BaseSampler], choices: Sequence[CategoricalChoiceType] +) -> None: + distribution = CategoricalDistribution(choices) + + study = optuna.study.create_study(sampler=sampler_class()) + + def sample() -> float: + trial = _create_new_trial(study) + param_value = study.sampler.sample_independent(study, trial, "x", distribution) + return float(distribution.to_internal_repr(param_value)) + + points = np.asarray([sample() for i in range(100)]) + + # 'x' value is corresponding to an index of distribution.choices. + assert np.all(points >= 0) + assert np.all(points <= len(distribution.choices) - 1) + round_points = np.round(points) + np.testing.assert_almost_equal(round_points, points) + + +@parametrize_relative_sampler +@pytest.mark.parametrize( + "x_distribution", + [ + FloatDistribution(-1.0, 1.0), + FloatDistribution(1e-7, 1.0, log=True), + FloatDistribution(-10, 10, step=0.5), + IntDistribution(3, 10), + IntDistribution(1, 100, log=True), + IntDistribution(3, 9, step=2), + ], +) +@pytest.mark.parametrize( + "y_distribution", + [ + FloatDistribution(-1.0, 1.0), + FloatDistribution(1e-7, 1.0, log=True), + FloatDistribution(-10, 10, step=0.5), + IntDistribution(3, 10), + IntDistribution(1, 100, log=True), + IntDistribution(3, 9, step=2), + ], +) +def test_sample_relative_numerical( + relative_sampler_class: Callable[[], BaseSampler], + x_distribution: BaseDistribution, + y_distribution: BaseDistribution, +) -> None: + search_space: dict[str, BaseDistribution] = dict(x=x_distribution, y=y_distribution) + study = optuna.study.create_study(sampler=relative_sampler_class()) + trial = study.ask(search_space) + study.tell(trial, sum(trial.params.values())) + + def sample() -> list[int | float]: + params = study.sampler.sample_relative(study, _create_new_trial(study), search_space) + return [params[name] for name in search_space] + + points = np.array([sample() for _ in range(10)]) + for i, distribution in enumerate(search_space.values()): + assert isinstance( + distribution, + ( + FloatDistribution, + IntDistribution, + ), + ) + assert np.all(points[:, i] >= distribution.low) + assert np.all(points[:, i] <= distribution.high) + for param_value, distribution in zip(sample(), search_space.values()): + assert not isinstance(param_value, np.floating) + assert not isinstance(param_value, np.integer) + if isinstance(distribution, IntDistribution): + assert isinstance(param_value, int) + else: + assert isinstance(param_value, float) + + +@parametrize_relative_sampler +def test_sample_relative_categorical(relative_sampler_class: Callable[[], BaseSampler]) -> None: + search_space: dict[str, BaseDistribution] = dict( + x=CategoricalDistribution([1, 10, 100]), y=CategoricalDistribution([-1, -10, -100]) + ) + study = optuna.study.create_study(sampler=relative_sampler_class()) + trial = study.ask(search_space) + study.tell(trial, sum(trial.params.values())) + + def sample() -> list[float]: + params = study.sampler.sample_relative(study, _create_new_trial(study), search_space) + return [params[name] for name in search_space] + + points = np.array([sample() for _ in range(10)]) + for i, distribution in enumerate(search_space.values()): + assert isinstance(distribution, CategoricalDistribution) + assert np.all([v in distribution.choices for v in points[:, i]]) + for param_value in sample(): + assert not isinstance(param_value, np.floating) + assert not isinstance(param_value, np.integer) + assert isinstance(param_value, int) + + +@parametrize_relative_sampler +@pytest.mark.parametrize( + "x_distribution", + [ + FloatDistribution(-1.0, 1.0), + FloatDistribution(1e-7, 1.0, log=True), + FloatDistribution(-10, 10, step=0.5), + IntDistribution(1, 10), + IntDistribution(1, 100, log=True), + ], +) +def test_sample_relative_mixed( + relative_sampler_class: Callable[[], BaseSampler], x_distribution: BaseDistribution +) -> None: + search_space: dict[str, BaseDistribution] = dict( + x=x_distribution, y=CategoricalDistribution([-1, -10, -100]) + ) + study = optuna.study.create_study(sampler=relative_sampler_class()) + trial = study.ask(search_space) + study.tell(trial, sum(trial.params.values())) + + def sample() -> list[float]: + params = study.sampler.sample_relative(study, _create_new_trial(study), search_space) + return [params[name] for name in search_space] + + points = np.array([sample() for _ in range(10)]) + assert isinstance( + search_space["x"], + ( + FloatDistribution, + IntDistribution, + ), + ) + assert np.all(points[:, 0] >= search_space["x"].low) + assert np.all(points[:, 0] <= search_space["x"].high) + assert isinstance(search_space["y"], CategoricalDistribution) + assert np.all([v in search_space["y"].choices for v in points[:, 1]]) + for param_value, distribution in zip(sample(), search_space.values()): + assert not isinstance(param_value, np.floating) + assert not isinstance(param_value, np.integer) + if isinstance( + distribution, + ( + IntDistribution, + CategoricalDistribution, + ), + ): + assert isinstance(param_value, int) + else: + assert isinstance(param_value, float) + + +@parametrize_sampler +def test_conditional_sample_independent(sampler_class: Callable[[], BaseSampler]) -> None: + # This test case reproduces the error reported in #2734. + # See https://github.com/optuna/optuna/pull/2734#issuecomment-857649769. + + study = optuna.study.create_study(sampler=sampler_class()) + categorical_distribution = CategoricalDistribution(choices=["x", "y"]) + dependent_distribution = CategoricalDistribution(choices=["a", "b"]) + + study.add_trial( + optuna.create_trial( + params={"category": "x", "x": "a"}, + distributions={"category": categorical_distribution, "x": dependent_distribution}, + value=0.1, + ) + ) + + study.add_trial( + optuna.create_trial( + params={"category": "y", "y": "b"}, + distributions={"category": categorical_distribution, "y": dependent_distribution}, + value=0.1, + ) + ) + + _trial = _create_new_trial(study) + category = study.sampler.sample_independent( + study, _trial, "category", categorical_distribution + ) + assert category in ["x", "y"] + value = study.sampler.sample_independent(study, _trial, category, dependent_distribution) + assert value in ["a", "b"] + + +def _create_new_trial(study: Study) -> FrozenTrial: + trial_id = study._storage.create_new_trial(study._study_id) + return study._storage.get_trial(trial_id) + + +@parametrize_sampler +def test_nan_objective_value(sampler_class: Callable[[], BaseSampler]) -> None: + study = optuna.create_study(sampler=sampler_class()) + + def objective(trial: Trial, base_value: float) -> float: + return trial.suggest_float("x", 0.1, 0.2) + base_value + + # Non NaN objective values. + for i in range(10, 1, -1): + study.optimize(lambda t: objective(t, i), n_trials=1, catch=()) + assert int(study.best_value) == 2 + + # NaN objective values. + study.optimize(lambda t: objective(t, float("nan")), n_trials=1, catch=()) + assert int(study.best_value) == 2 + + # Non NaN objective value. + study.optimize(lambda t: objective(t, 1), n_trials=1, catch=()) + assert int(study.best_value) == 1 + + +@parametrize_sampler +def test_partial_fixed_sampling(sampler_class: Callable[[], BaseSampler]) -> None: + study = optuna.create_study(sampler=sampler_class()) + + def objective(trial: Trial) -> float: + x = trial.suggest_float("x", -1, 1) + y = trial.suggest_int("y", -1, 1) + z = trial.suggest_float("z", -1, 1) + return x + y + z + + # First trial. + study.optimize(objective, n_trials=1) + + # Second trial. Here, the parameter ``y`` is fixed as 0. + fixed_params = {"y": 0} + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + study.sampler = optuna.samplers.PartialFixedSampler(fixed_params, study.sampler) + study.optimize(objective, n_trials=1) + trial_params = study.trials[-1].params + assert trial_params["y"] == fixed_params["y"] + + +@parametrize_multi_objective_sampler +@pytest.mark.parametrize( + "distribution", + [ + FloatDistribution(-1.0, 1.0), + FloatDistribution(0.0, 1.0), + FloatDistribution(-1.0, 0.0), + FloatDistribution(1e-7, 1.0, log=True), + FloatDistribution(-10, 10, step=0.1), + FloatDistribution(-10.2, 10.2, step=0.1), + IntDistribution(-10, 10), + IntDistribution(0, 10), + IntDistribution(-10, 0), + IntDistribution(-10, 10, step=2), + IntDistribution(0, 10, step=2), + IntDistribution(-10, 0, step=2), + IntDistribution(1, 100, log=True), + CategoricalDistribution((1, 2, 3)), + CategoricalDistribution(("a", "b", "c")), + CategoricalDistribution((1, "a")), + ], +) +def test_multi_objective_sample_independent( + multi_objective_sampler_class: Callable[[], BaseSampler], distribution: BaseDistribution +) -> None: + study = optuna.study.create_study( + directions=["minimize", "maximize"], sampler=multi_objective_sampler_class() + ) + for i in range(100): + value = study.sampler.sample_independent( + study, _create_new_trial(study), "x", distribution + ) + assert distribution._contains(distribution.to_internal_repr(value)) + + if not isinstance(distribution, CategoricalDistribution): + # Please see https://github.com/optuna/optuna/pull/393 why this assertion is needed. + assert not isinstance(value, np.floating) + + if isinstance(distribution, FloatDistribution): + if distribution.step is not None: + # Check the value is a multiple of `distribution.step` which is + # the quantization interval of the distribution. + value -= distribution.low + value /= distribution.step + round_value = np.round(value) + np.testing.assert_almost_equal(round_value, value) + + +@parametrize_sampler +def test_sample_single_distribution(sampler_class: Callable[[], BaseSampler]) -> None: + relative_search_space = { + "a": CategoricalDistribution([1]), + "b": IntDistribution(low=1, high=1), + "c": IntDistribution(low=1, high=1, log=True), + "d": FloatDistribution(low=1.0, high=1.0), + "e": FloatDistribution(low=1.0, high=1.0, log=True), + "f": FloatDistribution(low=1.0, high=1.0, step=1.0), + } + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = sampler_class() + study = optuna.study.create_study(sampler=sampler) + + # We need to test the construction of the model, so we should set `n_trials >= 2`. + for _ in range(2): + trial = study.ask(fixed_distributions=relative_search_space) + study.tell(trial, 1.0) + for param_name in relative_search_space.keys(): + assert trial.params[param_name] == 1 + + +@parametrize_sampler +@parametrize_suggest_method("x") +def test_single_parameter_objective( + sampler_class: Callable[[], BaseSampler], suggest_method_x: Callable[[Trial], float] +) -> None: + def objective(trial: Trial) -> float: + return suggest_method_x(trial) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = sampler_class() + + study = optuna.study.create_study(sampler=sampler) + study.optimize(objective, n_trials=10) + + assert len(study.trials) == 10 + assert all(t.state == TrialState.COMPLETE for t in study.trials) + + +@parametrize_sampler +def test_conditional_parameter_objective(sampler_class: Callable[[], BaseSampler]) -> None: + def objective(trial: Trial) -> float: + x = trial.suggest_categorical("x", [True, False]) + if x: + return trial.suggest_float("y", 0, 1) + return trial.suggest_float("z", 0, 1) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = sampler_class() + + study = optuna.study.create_study(sampler=sampler) + study.optimize(objective, n_trials=10) + + assert len(study.trials) == 10 + assert all(t.state == TrialState.COMPLETE for t in study.trials) + + +@parametrize_sampler +@parametrize_suggest_method("x") +@parametrize_suggest_method("y") +def test_combination_of_different_distributions_objective( + sampler_class: Callable[[], BaseSampler], + suggest_method_x: Callable[[Trial], float], + suggest_method_y: Callable[[Trial], float], +) -> None: + def objective(trial: Trial) -> float: + return suggest_method_x(trial) + suggest_method_y(trial) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = sampler_class() + + study = optuna.study.create_study(sampler=sampler) + study.optimize(objective, n_trials=3) + + assert len(study.trials) == 3 + assert all(t.state == TrialState.COMPLETE for t in study.trials) + + +@parametrize_sampler +@pytest.mark.parametrize( + "second_low,second_high", + [ + (0, 5), # Narrow range. + (0, 20), # Expand range. + (20, 30), # Set non-overlapping range. + ], +) +def test_dynamic_range_objective( + sampler_class: Callable[[], BaseSampler], second_low: int, second_high: int +) -> None: + def objective(trial: Trial, low: int, high: int) -> float: + v = trial.suggest_float("x", low, high) + v += trial.suggest_int("y", low, high) + return v + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) + sampler = sampler_class() + + study = optuna.study.create_study(sampler=sampler) + study.optimize(lambda t: objective(t, 0, 10), n_trials=10) + study.optimize(lambda t: objective(t, second_low, second_high), n_trials=10) + + assert len(study.trials) == 20 + assert all(t.state == TrialState.COMPLETE for t in study.trials) + + +# We add tests for constant objective functions to ensure the reproducibility of sorting. +@parametrize_sampler_with_seed +@pytest.mark.slow +@pytest.mark.parametrize("objective_func", [lambda *args: sum(args), lambda *args: 0.0]) +def test_reproducible(sampler_class: Callable[[int], BaseSampler], objective_func: Any) -> None: + def objective(trial: Trial) -> float: + a = trial.suggest_float("a", 1, 9) + b = trial.suggest_float("b", 1, 9, log=True) + c = trial.suggest_float("c", 1, 9, step=1) + d = trial.suggest_int("d", 1, 9) + e = trial.suggest_int("e", 1, 9, log=True) + f = trial.suggest_int("f", 1, 9, step=2) + g = trial.suggest_categorical("g", range(1, 10)) + return objective_func(a, b, c, d, e, f, g) + + study = optuna.create_study(sampler=sampler_class(1)) + study.optimize(objective, n_trials=15) + + study_same_seed = optuna.create_study(sampler=sampler_class(1)) + study_same_seed.optimize(objective, n_trials=15) + for i in range(15): + assert study.trials[i].params == study_same_seed.trials[i].params + + study_different_seed = optuna.create_study(sampler=sampler_class(2)) + study_different_seed.optimize(objective, n_trials=15) + assert any( + [study.trials[i].params != study_different_seed.trials[i].params for i in range(15)] + ) + + +@pytest.mark.slow +@parametrize_sampler_with_seed +def test_reseed_rng_change_sampling(sampler_class: Callable[[int], BaseSampler]) -> None: + def objective(trial: Trial) -> float: + a = trial.suggest_float("a", 1, 9) + b = trial.suggest_float("b", 1, 9, log=True) + c = trial.suggest_float("c", 1, 9, step=1) + d = trial.suggest_int("d", 1, 9) + e = trial.suggest_int("e", 1, 9, log=True) + f = trial.suggest_int("f", 1, 9, step=2) + g = trial.suggest_categorical("g", range(1, 10)) + return a + b + c + d + e + f + g + + sampler = sampler_class(1) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=15) + + sampler_different_seed = sampler_class(1) + sampler_different_seed.reseed_rng() + study_different_seed = optuna.create_study(sampler=sampler_different_seed) + study_different_seed.optimize(objective, n_trials=15) + assert any( + [study.trials[i].params != study_different_seed.trials[i].params for i in range(15)] + ) + + +# This function is used only in test_reproducible_in_other_process, but declared at top-level +# because local function cannot be pickled, which occurs within multiprocessing. +def run_optimize( + k: int, + sampler_name: str, + sequence_dict: DictProxy, + hash_dict: DictProxy, +) -> None: + def objective(trial: Trial) -> float: + a = trial.suggest_float("a", 1, 9) + b = trial.suggest_float("b", 1, 9, log=True) + c = trial.suggest_float("c", 1, 9, step=1) + d = trial.suggest_int("d", 1, 9) + e = trial.suggest_int("e", 1, 9, log=True) + f = trial.suggest_int("f", 1, 9, step=2) + g = trial.suggest_categorical("g", range(1, 10)) + return a + b + c + d + e + f + g + + hash_dict[k] = hash("nondeterministic hash") + sampler = sampler_class_with_seed[sampler_name][0](1) + study = optuna.create_study(sampler=sampler) + study.optimize(objective, n_trials=15) + sequence_dict[k] = list(study.trials[-1].params.values()) + + +@pytest.fixture +def unset_seed_in_test(request: SubRequest) -> None: + # Unset the hashseed at beginning and restore it at end regardless of an exception in the test. + # See https://docs.pytest.org/en/stable/how-to/fixtures.html#adding-finalizers-directly + # for details. + + hash_seed = os.getenv("PYTHONHASHSEED") + if hash_seed is not None: + del os.environ["PYTHONHASHSEED"] + + def restore_seed() -> None: + if hash_seed is not None: + os.environ["PYTHONHASHSEED"] = hash_seed + + request.addfinalizer(restore_seed) + + +@pytest.mark.slow +@parametrize_sampler_name_with_seed +def test_reproducible_in_other_process(sampler_name: str, unset_seed_in_test: None) -> None: + # This test should be tested without `PYTHONHASHSEED`. However, some tool such as tox + # set the environmental variable "PYTHONHASHSEED" by default. + # To do so, this test calls a finalizer: `unset_seed_in_test`. + + # Multiprocessing supports three way to start a process. + # We use `spawn` option to create a child process as a fresh python process. + # For more detail, see https://github.com/optuna/optuna/pull/3187#issuecomment-997673037. + multiprocessing.set_start_method("spawn", force=True) + manager = multiprocessing.Manager() + sequence_dict: DictProxy = manager.dict() + hash_dict: DictProxy = manager.dict() + for i in range(3): + p = multiprocessing.Process( + target=run_optimize, args=(i, sampler_name, sequence_dict, hash_dict) + ) + p.start() + p.join() + + # Hashes are expected to be different because string hashing is nondeterministic per process. + assert not (hash_dict[0] == hash_dict[1] == hash_dict[2]) + # But the sequences are expected to be the same. + assert sequence_dict[0] == sequence_dict[1] == sequence_dict[2] + + +@pytest.mark.parametrize("n_jobs", [1, 2]) +@parametrize_relative_sampler +def test_cache_is_invalidated( + n_jobs: int, relative_sampler_class: Callable[[], BaseSampler] +) -> None: + sampler = relative_sampler_class() + study = optuna.study.create_study(sampler=sampler) + + def objective(trial: Trial) -> float: + assert trial._relative_params is None + assert study._thread_local.cached_all_trials is None + + trial.suggest_float("x", -10, 10) + trial.suggest_float("y", -10, 10) + assert trial._relative_params is not None + return -1 + + study.optimize(objective, n_trials=10, n_jobs=n_jobs) From b39adf0d7841339b302d1c5bfd7e48a6b4564821 Mon Sep 17 00:00:00 2001 From: y0z Date: Tue, 6 Feb 2024 19:34:44 +0900 Subject: [PATCH 4/7] Delete unnecessary files. --- optuna/integration/botorch.py | 986 ------------------------ tests/integration_tests/test_botorch.py | 559 -------------- 2 files changed, 1545 deletions(-) delete mode 100644 optuna/integration/botorch.py delete mode 100644 tests/integration_tests/test_botorch.py diff --git a/optuna/integration/botorch.py b/optuna/integration/botorch.py deleted file mode 100644 index 78746348..00000000 --- a/optuna/integration/botorch.py +++ /dev/null @@ -1,986 +0,0 @@ -from typing import Any -from typing import Callable -from typing import Dict -from typing import Optional -from typing import Sequence -from typing import Union -import warnings - -import numpy -from packaging import version - -from optuna import logging -from optuna._experimental import experimental_class -from optuna._experimental import experimental_func -from optuna._imports import try_import -from optuna._transform import _SearchSpaceTransform -from optuna.distributions import BaseDistribution -from optuna.samplers import BaseSampler -from optuna.samplers import RandomSampler -from optuna.samplers._base import _CONSTRAINTS_KEY -from optuna.samplers._base import _process_constraints_after_trial -from optuna.search_space import IntersectionSearchSpace -from optuna.study import Study -from optuna.study import StudyDirection -from optuna.trial import FrozenTrial -from optuna.trial import TrialState - - -with try_import() as _imports: - from botorch.acquisition.monte_carlo import qExpectedImprovement - from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement - from botorch.acquisition.multi_objective import monte_carlo - from botorch.acquisition.multi_objective.analytic import ExpectedHypervolumeImprovement - from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective - from botorch.acquisition.objective import ConstrainedMCObjective - from botorch.acquisition.objective import GenericMCObjective - from botorch.models import SingleTaskGP - from botorch.models.transforms.outcome import Standardize - from botorch.optim import optimize_acqf - from botorch.sampling import SobolQMCNormalSampler - import botorch.version - - if version.parse(botorch.version.version) < version.parse("0.8.0"): - from botorch.fit import fit_gpytorch_model as fit_gpytorch_mll - - def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: - return SobolQMCNormalSampler(num_samples) - - else: - from botorch.fit import fit_gpytorch_mll - - def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: - return SobolQMCNormalSampler(torch.Size((num_samples,))) - - from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning - from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization - from botorch.utils.sampling import manual_seed - from botorch.utils.sampling import sample_simplex - from botorch.utils.transforms import normalize - from botorch.utils.transforms import unnormalize - from gpytorch.mlls import ExactMarginalLogLikelihood - import torch - - -_logger = logging.get_logger(__name__) - -with try_import() as _imports_logei: - from botorch.acquisition.analytic import LogConstrainedExpectedImprovement - from botorch.acquisition.analytic import LogExpectedImprovement - - -@experimental_func("3.3.0") -def logei_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Log Expected Improvement (LogEI). - - The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` - with single-objective optimization. - - Args: - train_x: - Previous parameter configurations. A ``torch.Tensor`` of shape - ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials - and ``n_params`` is the number of parameters. ``n_params`` may be larger than the - actual number of parameters if categorical parameters are included in the search - space, since these parameters are one-hot encoded. - Values are not normalized. - train_obj: - Previously observed objectives. A ``torch.Tensor`` of shape - ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. - ``n_objectives`` is the number of objectives. Observations are not normalized. - train_con: - Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. - ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of - constraints. A constraint is violated if strictly larger than 0. If no constraints are - involved in the optimization, this argument will be :obj:`None`. - bounds: - Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is - identical to that of ``train_x``. The first and the second rows correspond to the - lower and upper bounds for each parameter respectively. - pending_x: - Pending parameter configurations. A ``torch.Tensor`` of shape - ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already - suggested all their parameters but have not completed their evaluation, and - ``n_params`` is identical to that of ``train_x``. - - Returns: - Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. - - """ - - # We need botorch >=0.8.1 for LogExpectedImprovement. - if not _imports_logei.is_successful(): - raise ImportError( - "logei_candidates_func requires botorch >=0.8.1. " - "Please upgrade botorch or use qei_candidates_func as candidates_func instead." - ) - - if train_obj.size(-1) != 1: - raise ValueError("Objective may only contain single values with logEI.") - n_constraints = train_con.size(1) if train_con is not None else 0 - if n_constraints > 0: - assert train_con is not None - train_y = torch.cat([train_obj, train_con], dim=-1) - - is_feas = (train_con <= 0).all(dim=-1) - train_obj_feas = train_obj[is_feas] - - if train_obj_feas.numel() == 0: - _logger.warning( - "No objective values are feasible. Using 0 as the best objective in logEI." - ) - best_f = train_obj.min() - else: - best_f = train_obj_feas.max() - - else: - train_y = train_obj - best_f = train_obj.max() - - train_x = normalize(train_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - if n_constraints > 0: - acqf = LogConstrainedExpectedImprovement( - model=model, - best_f=best_f, - objective_index=0, - constraints={i: (None, 0.0) for i in range(1, n_constraints + 1)}, - ) - else: - acqf = LogExpectedImprovement( - model=model, - best_f=best_f, - ) - - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=10, - raw_samples=512, - options={"batch_limit": 5, "maxiter": 200}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("2.4.0") -def qei_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Quasi MC-based batch Expected Improvement (qEI). - - Args: - train_x: - Previous parameter configurations. A ``torch.Tensor`` of shape - ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials - and ``n_params`` is the number of parameters. ``n_params`` may be larger than the - actual number of parameters if categorical parameters are included in the search - space, since these parameters are one-hot encoded. - Values are not normalized. - train_obj: - Previously observed objectives. A ``torch.Tensor`` of shape - ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. - ``n_objectives`` is the number of objectives. Observations are not normalized. - train_con: - Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. - ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of - constraints. A constraint is violated if strictly larger than 0. If no constraints are - involved in the optimization, this argument will be :obj:`None`. - bounds: - Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is - identical to that of ``train_x``. The first and the second rows correspond to the - lower and upper bounds for each parameter respectively. - pending_x: - Pending parameter configurations. A ``torch.Tensor`` of shape - ``(n_pending, n_params)``. ``n_pending`` is the number of the trials which are already - suggested all their parameters but have not completed their evaluation, and - ``n_params`` is identical to that of ``train_x``. - Returns: - Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. - - """ - - if train_obj.size(-1) != 1: - raise ValueError("Objective may only contain single values with qEI.") - if train_con is not None: - train_y = torch.cat([train_obj, train_con], dim=-1) - - is_feas = (train_con <= 0).all(dim=-1) - train_obj_feas = train_obj[is_feas] - - if train_obj_feas.numel() == 0: - # TODO(hvy): Do not use 0 as the best observation. - _logger.warning( - "No objective values are feasible. Using 0 as the best objective in qEI." - ) - best_f = torch.zeros(()) - else: - best_f = train_obj_feas.max() - - n_constraints = train_con.size(1) - objective = ConstrainedMCObjective( - objective=lambda Z: Z[..., 0], - constraints=[ - (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) - ], - ) - else: - train_y = train_obj - - best_f = train_obj.max() - - objective = None # Using the default identity objective. - - train_x = normalize(train_x, bounds=bounds) - if pending_x is not None: - pending_x = normalize(pending_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - acqf = qExpectedImprovement( - model=model, - best_f=best_f, - sampler=_get_sobol_qmc_normal_sampler(256), - objective=objective, - X_pending=pending_x, - ) - - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=10, - raw_samples=512, - options={"batch_limit": 5, "maxiter": 200}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("3.3.0") -def qnei_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Quasi MC-based batch Noisy Expected Improvement (qNEI). - - This function may perform better than qEI (`qei_candidates_func`) when - the evaluated values of objective function are noisy. - - .. seealso:: - :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value - descriptions. - """ - if train_obj.size(-1) != 1: - raise ValueError("Objective may only contain single values with qNEI.") - if train_con is not None: - train_y = torch.cat([train_obj, train_con], dim=-1) - - n_constraints = train_con.size(1) - objective = ConstrainedMCObjective( - objective=lambda Z: Z[..., 0], - constraints=[ - (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) - ], - ) - else: - train_y = train_obj - - objective = None # Using the default identity objective. - - train_x = normalize(train_x, bounds=bounds) - if pending_x is not None: - pending_x = normalize(pending_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - acqf = qNoisyExpectedImprovement( - model=model, - X_baseline=train_x, - sampler=_get_sobol_qmc_normal_sampler(256), - objective=objective, - X_pending=pending_x, - ) - - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=10, - raw_samples=512, - options={"batch_limit": 5, "maxiter": 200}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("2.4.0") -def qehvi_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). - - The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` - with multi-objective optimization when the number of objectives is three or less. - - .. seealso:: - :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value - descriptions. - """ - - n_objectives = train_obj.size(-1) - - if train_con is not None: - train_y = torch.cat([train_obj, train_con], dim=-1) - - is_feas = (train_con <= 0).all(dim=-1) - train_obj_feas = train_obj[is_feas] - - n_constraints = train_con.size(1) - additional_qehvi_kwargs = { - "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), - "constraints": [ - (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) - ], - } - else: - train_y = train_obj - - train_obj_feas = train_obj - - additional_qehvi_kwargs = {} - - train_x = normalize(train_x, bounds=bounds) - if pending_x is not None: - pending_x = normalize(pending_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - # Approximate box decomposition similar to Ax when the number of objectives is large. - # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 - if n_objectives > 4: - alpha = 10 ** (-8 + n_objectives) - else: - alpha = 0.0 - - ref_point = train_obj.min(dim=0).values - 1e-8 - - partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_obj_feas, alpha=alpha) - - ref_point_list = ref_point.tolist() - - acqf = monte_carlo.qExpectedHypervolumeImprovement( - model=model, - ref_point=ref_point_list, - partitioning=partitioning, - sampler=_get_sobol_qmc_normal_sampler(256), - X_pending=pending_x, - **additional_qehvi_kwargs, - ) - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=20, - raw_samples=1024, - options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("3.5.0") -def ehvi_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Expected Hypervolume Improvement (EHVI). - - The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` - with multi-objective optimization without constraints. - - .. seealso:: - :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value - descriptions. - """ - - n_objectives = train_obj.size(-1) - if train_con is not None: - raise ValueError("Constraints are not supported with ehvi_candidates_func.") - - train_y = train_obj - train_x = normalize(train_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - # Approximate box decomposition similar to Ax when the number of objectives is large. - # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 - if n_objectives > 4: - alpha = 10 ** (-8 + n_objectives) - else: - alpha = 0.0 - - ref_point = train_obj.min(dim=0).values - 1e-8 - - partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_y, alpha=alpha) - - ref_point_list = ref_point.tolist() - - acqf = ExpectedHypervolumeImprovement( - model=model, - ref_point=ref_point_list, - partitioning=partitioning, - ) - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=20, - raw_samples=1024, - options={"batch_limit": 5, "maxiter": 200}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("3.1.0") -def qnehvi_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Quasi MC-based batch Noisy Expected Hypervolume Improvement (qNEHVI). - - According to Botorch/Ax documentation, - this function may perform better than qEHVI (`qehvi_candidates_func`). - (cf. https://botorch.org/tutorials/constrained_multi_objective_bo ) - - .. seealso:: - :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value - descriptions. - """ - - n_objectives = train_obj.size(-1) - - if train_con is not None: - train_y = torch.cat([train_obj, train_con], dim=-1) - - n_constraints = train_con.size(1) - additional_qnehvi_kwargs = { - "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), - "constraints": [ - (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) - ], - } - else: - train_y = train_obj - - additional_qnehvi_kwargs = {} - - train_x = normalize(train_x, bounds=bounds) - if pending_x is not None: - pending_x = normalize(pending_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - # Approximate box decomposition similar to Ax when the number of objectives is large. - # https://github.com/pytorch/botorch/blob/36d09a4297c2a0ff385077b7fcdd5a9d308e40cc/botorch/acquisition/multi_objective/utils.py#L46-L63 - if n_objectives > 4: - alpha = 10 ** (-8 + n_objectives) - else: - alpha = 0.0 - - ref_point = train_obj.min(dim=0).values - 1e-8 - - ref_point_list = ref_point.tolist() - - # prune_baseline=True is generally recommended by the documentation of BoTorch. - # cf. https://botorch.org/api/acquisition.html (accessed on 2022/11/18) - acqf = monte_carlo.qNoisyExpectedHypervolumeImprovement( - model=model, - ref_point=ref_point_list, - X_baseline=train_x, - alpha=alpha, - prune_baseline=True, - sampler=_get_sobol_qmc_normal_sampler(256), - X_pending=pending_x, - **additional_qnehvi_kwargs, - ) - - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=20, - raw_samples=1024, - options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -@experimental_func("2.4.0") -def qparego_candidates_func( - train_x: "torch.Tensor", - train_obj: "torch.Tensor", - train_con: Optional["torch.Tensor"], - bounds: "torch.Tensor", - pending_x: Optional["torch.Tensor"], -) -> "torch.Tensor": - """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. - - The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` - with multi-objective optimization when the number of objectives is larger than three. - - .. seealso:: - :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value - descriptions. - """ - - n_objectives = train_obj.size(-1) - - weights = sample_simplex(n_objectives).squeeze() - scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) - - if train_con is not None: - train_y = torch.cat([train_obj, train_con], dim=-1) - n_constraints = train_con.size(1) - objective = ConstrainedMCObjective( - objective=lambda Z: scalarization(Z[..., :n_objectives]), - constraints=[ - (lambda Z, i=i: Z[..., -n_constraints + i]) for i in range(n_constraints) - ], - ) - else: - train_y = train_obj - - objective = GenericMCObjective(scalarization) - - train_x = normalize(train_x, bounds=bounds) - if pending_x is not None: - pending_x = normalize(pending_x, bounds=bounds) - - model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll) - - acqf = qExpectedImprovement( - model=model, - best_f=objective(train_y).max(), - sampler=_get_sobol_qmc_normal_sampler(256), - objective=objective, - X_pending=pending_x, - ) - - standard_bounds = torch.zeros_like(bounds) - standard_bounds[1] = 1 - - candidates, _ = optimize_acqf( - acq_function=acqf, - bounds=standard_bounds, - q=1, - num_restarts=20, - raw_samples=1024, - options={"batch_limit": 5, "maxiter": 200}, - sequential=True, - ) - - candidates = unnormalize(candidates.detach(), bounds=bounds) - - return candidates - - -def _get_default_candidates_func( - n_objectives: int, - has_constraint: bool, - consider_running_trials: bool, -) -> Callable[ - [ - "torch.Tensor", - "torch.Tensor", - Optional["torch.Tensor"], - "torch.Tensor", - Optional["torch.Tensor"], - ], - "torch.Tensor", -]: - if n_objectives > 3 and not has_constraint and not consider_running_trials: - return ehvi_candidates_func - elif n_objectives > 3: - return qparego_candidates_func - elif n_objectives > 1: - return qehvi_candidates_func - elif consider_running_trials: - return qei_candidates_func - else: - return logei_candidates_func - - -@experimental_class("2.4.0") -class BoTorchSampler(BaseSampler): - """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch. - - This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter - configurations. Parameters are transformed to continuous space and passed to BoTorch, and then - transformed back to Optuna's representations. Categorical parameters are one-hot encoded. - - .. seealso:: - See an `example `_ how to use the sampler. - - .. seealso:: - See the `BoTorch `_ homepage for details and for how to implement - your own ``candidates_func``. - - .. note:: - An instance of this sampler *should not be used with different studies* when used with - constraints. Instead, a new instance should be created for each new study. The reason for - this is that the sampler is stateful keeping all the computed constraints. - - Args: - candidates_func: - An optional function that suggests the next candidates. It must take the training - data, the objectives, the constraints, the search space bounds and return the next - candidates. The arguments are of type ``torch.Tensor``. The return value must be a - ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be - :obj:`None`. For any constraints that failed to compute, the tensor will contain - NaN. - - If omitted, it is determined automatically based on the number of objectives and - whether a constraint is specified. If the - number of objectives is one and no constraint is specified, log-Expected Improvement - is used. If constraints are specified, quasi MC-based batch Expected Improvement - (qEI) is used. - If the number of objectives is either two or three, Quasi MC-based - batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for a larger number - of objectives, analytic Expected Hypervolume Improvement is used if no constraints - are specified, or the faster Quasi MC-based extended ParEGO (qParEGO) is used if - constraints are present. - - The function should assume *maximization* of the objective. - - .. seealso:: - See :func:`optuna.integration.botorch.qei_candidates_func` for an example. - constraints_func: - An optional function that computes the objective constraints. It must take a - :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must - be a sequence of :obj:`float` s. A value strictly larger than 0 means that a - constraint is violated. A value equal to or smaller than 0 is considered feasible. - - If omitted, no constraints will be passed to ``candidates_func`` nor taken into - account during suggestion. - n_startup_trials: - Number of initial trials, that is the number of trials to resort to independent - sampling. - consider_running_trials: - If True, the acquisition function takes into consideration the running parameters - whose evaluation has not completed. Enabling this option is considered to improve the - performance of parallel optimization. - - .. note:: - Added in v3.2.0 as an experimental argument. - independent_sampler: - An independent sampler to use for the initial trials and for parameters that are - conditional. - seed: - Seed for random number generator. - device: - A ``torch.device`` to store input and output data of BoTorch. Please set a CUDA device - if you fasten sampling. - """ - - def __init__( - self, - *, - candidates_func: Optional[ - Callable[ - [ - "torch.Tensor", - "torch.Tensor", - Optional["torch.Tensor"], - "torch.Tensor", - Optional["torch.Tensor"], - ], - "torch.Tensor", - ] - ] = None, - constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, - n_startup_trials: int = 10, - consider_running_trials: bool = False, - independent_sampler: Optional[BaseSampler] = None, - seed: Optional[int] = None, - device: Optional["torch.device"] = None, - ): - _imports.check() - - self._candidates_func = candidates_func - self._constraints_func = constraints_func - self._consider_running_trials = consider_running_trials - self._independent_sampler = independent_sampler or RandomSampler(seed=seed) - self._n_startup_trials = n_startup_trials - self._seed = seed - - self._study_id: Optional[int] = None - self._search_space = IntersectionSearchSpace() - self._device = device or torch.device("cpu") - - def infer_relative_search_space( - self, - study: Study, - trial: FrozenTrial, - ) -> Dict[str, BaseDistribution]: - if self._study_id is None: - self._study_id = study._study_id - if self._study_id != study._study_id: - # Note that the check below is meaningless when `InMemoryStorage` is used - # because `InMemoryStorage.create_new_study` always returns the same study ID. - raise RuntimeError("BoTorchSampler cannot handle multiple studies.") - - search_space: Dict[str, BaseDistribution] = {} - for name, distribution in self._search_space.calculate(study).items(): - if distribution.single(): - # built-in `candidates_func` cannot handle distributions that contain just a - # single value, so we skip them. Note that the parameter values for such - # distributions are sampled in `Trial`. - continue - search_space[name] = distribution - - return search_space - - def sample_relative( - self, - study: Study, - trial: FrozenTrial, - search_space: Dict[str, BaseDistribution], - ) -> Dict[str, Any]: - assert isinstance(search_space, dict) - - if len(search_space) == 0: - return {} - - completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) - running_trials = [ - t for t in study.get_trials(deepcopy=False, states=(TrialState.RUNNING,)) if t != trial - ] - trials = completed_trials + running_trials - - n_trials = len(trials) - n_completed_trials = len(completed_trials) - if n_trials < self._n_startup_trials: - return {} - - trans = _SearchSpaceTransform(search_space) - n_objectives = len(study.directions) - values: Union[numpy.ndarray, torch.Tensor] = numpy.empty( - (n_trials, n_objectives), dtype=numpy.float64 - ) - params: Union[numpy.ndarray, torch.Tensor] - con: Optional[Union[numpy.ndarray, torch.Tensor]] = None - bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds - params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) - for trial_idx, trial in enumerate(trials): - if trial.state == TrialState.COMPLETE: - params[trial_idx] = trans.transform(trial.params) - assert len(study.directions) == len(trial.values) - for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): - assert value is not None - if ( - direction == StudyDirection.MINIMIZE - ): # BoTorch always assumes maximization. - value *= -1 - values[trial_idx, obj_idx] = value - if self._constraints_func is not None: - constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( - _CONSTRAINTS_KEY - ) - if constraints is not None: - n_constraints = len(constraints) - - if con is None: - con = numpy.full( - (n_completed_trials, n_constraints), numpy.nan, dtype=numpy.float64 - ) - elif n_constraints != con.shape[1]: - raise RuntimeError( - f"Expected {con.shape[1]} constraints " - f"but received {n_constraints}." - ) - con[trial_idx] = constraints - elif trial.state == TrialState.RUNNING: - if all(p in trial.params for p in search_space): - params[trial_idx] = trans.transform(trial.params) - else: - params[trial_idx] = numpy.nan - else: - assert False, "trail.state must be TrialState.COMPLETE or TrialState.RUNNING." - - if self._constraints_func is not None: - if con is None: - warnings.warn( - "`constraints_func` was given but no call to it correctly computed " - "constraints. Constraints passed to `candidates_func` will be `None`." - ) - elif numpy.isnan(con).any(): - warnings.warn( - "`constraints_func` was given but some calls to it did not correctly compute " - "constraints. Constraints passed to `candidates_func` will contain NaN." - ) - - values = torch.from_numpy(values).to(self._device) - params = torch.from_numpy(params).to(self._device) - if con is not None: - con = torch.from_numpy(con).to(self._device) - bounds = torch.from_numpy(bounds).to(self._device) - - if con is not None: - if con.dim() == 1: - con.unsqueeze_(-1) - bounds.transpose_(0, 1) - - if self._candidates_func is None: - self._candidates_func = _get_default_candidates_func( - n_objectives=n_objectives, - has_constraint=con is not None, - consider_running_trials=self._consider_running_trials, - ) - - completed_values = values[:n_completed_trials] - completed_params = params[:n_completed_trials] - if self._consider_running_trials: - running_params = params[n_completed_trials:] - running_params = running_params[~torch.isnan(running_params).any(dim=1)] - else: - running_params = None - - with manual_seed(self._seed): - # `manual_seed` makes the default candidates functions reproducible. - # `SobolQMCNormalSampler`'s constructor has a `seed` argument, but its behavior is - # deterministic when the BoTorch's seed is fixed. - candidates = self._candidates_func( - completed_params, completed_values, con, bounds, running_params - ) - if self._seed is not None: - self._seed += 1 - - if not isinstance(candidates, torch.Tensor): - raise TypeError("Candidates must be a torch.Tensor.") - if candidates.dim() == 2: - if candidates.size(0) != 1: - raise ValueError( - "Candidates batch optimization is not supported and the first dimension must " - "have size 1 if candidates is a two-dimensional tensor. Actual: " - f"{candidates.size()}." - ) - # Batch size is one. Get rid of the batch dimension. - candidates = candidates.squeeze(0) - if candidates.dim() != 1: - raise ValueError("Candidates must be one or two-dimensional.") - if candidates.size(0) != bounds.size(1): - raise ValueError( - "Candidates size must match with the given bounds. Actual candidates: " - f"{candidates.size(0)}, bounds: {bounds.size(1)}." - ) - - return trans.untransform(candidates.cpu().numpy()) - - def sample_independent( - self, - study: Study, - trial: FrozenTrial, - param_name: str, - param_distribution: BaseDistribution, - ) -> Any: - return self._independent_sampler.sample_independent( - study, trial, param_name, param_distribution - ) - - def reseed_rng(self) -> None: - self._independent_sampler.reseed_rng() - if self._seed is not None: - self._seed = numpy.random.RandomState().randint(numpy.iinfo(numpy.int32).max) - - def before_trial(self, study: Study, trial: FrozenTrial) -> None: - self._independent_sampler.before_trial(study, trial) - - def after_trial( - self, - study: Study, - trial: FrozenTrial, - state: TrialState, - values: Optional[Sequence[float]], - ) -> None: - if self._constraints_func is not None: - _process_constraints_after_trial(self._constraints_func, study, trial, state) - self._independent_sampler.after_trial(study, trial, state, values) diff --git a/tests/integration_tests/test_botorch.py b/tests/integration_tests/test_botorch.py deleted file mode 100644 index 439c84ec..00000000 --- a/tests/integration_tests/test_botorch.py +++ /dev/null @@ -1,559 +0,0 @@ -from typing import Any -from typing import Optional -from typing import Sequence -from typing import Tuple -from unittest.mock import patch -import warnings - -from packaging import version -import pytest - -import optuna -from optuna import integration -from optuna._imports import try_import -from optuna.integration import BoTorchSampler -from optuna.samplers import RandomSampler -from optuna.samplers._base import _CONSTRAINTS_KEY -from optuna.storages import RDBStorage -from optuna.trial import FrozenTrial -from optuna.trial import Trial -from optuna.trial import TrialState - - -with try_import() as _imports: - import botorch - import torch - -if not _imports.is_successful(): - from unittest.mock import MagicMock - - torch = MagicMock() # NOQA - -pytestmark = pytest.mark.integration - - -@pytest.mark.parametrize("n_objectives", [1, 2, 4]) -def test_botorch_candidates_func_none(n_objectives: int) -> None: - if n_objectives == 1 and version.parse(botorch.version.version) < version.parse("0.8.1"): - pytest.skip("botorch >=0.8.1 is required for logei_candidates_func.") - - n_trials = 3 - n_startup_trials = 2 - - sampler = BoTorchSampler(n_startup_trials=n_startup_trials) - - study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) - study.optimize( - lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials - ) - - assert len(study.trials) == n_trials - - # TODO(hvy): Do not check for the correct candidates function using private APIs. - if n_objectives == 1: - assert sampler._candidates_func is integration.botorch.logei_candidates_func - elif n_objectives == 2: - assert sampler._candidates_func is integration.botorch.qehvi_candidates_func - elif n_objectives == 4: - assert sampler._candidates_func is integration.botorch.ehvi_candidates_func - else: - assert False, "Should not reach." - - -def test_botorch_candidates_func() -> None: - candidates_func_call_count = 0 - - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - assert train_con is None - - candidates = torch.rand(1) - - nonlocal candidates_func_call_count - candidates_func_call_count += 1 - - return candidates - - n_trials = 3 - n_startup_trials = 1 - - sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=n_startup_trials) - - study = optuna.create_study(direction="minimize", sampler=sampler) - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=n_trials) - - assert len(study.trials) == n_trials - assert candidates_func_call_count == n_trials - n_startup_trials - - -@pytest.mark.parametrize( - "candidates_func, n_objectives", - [ - (integration.botorch.ehvi_candidates_func, 4), - (integration.botorch.ehvi_candidates_func, 5), # alpha > 0 - (integration.botorch.logei_candidates_func, 1), - (integration.botorch.qei_candidates_func, 1), - (integration.botorch.qnei_candidates_func, 1), - (integration.botorch.qehvi_candidates_func, 2), - (integration.botorch.qehvi_candidates_func, 7), # alpha > 0 - (integration.botorch.qparego_candidates_func, 4), - (integration.botorch.qnehvi_candidates_func, 2), - (integration.botorch.qnehvi_candidates_func, 6), # alpha > 0 - ], -) -def test_botorch_specify_candidates_func(candidates_func: Any, n_objectives: int) -> None: - if candidates_func == integration.botorch.logei_candidates_func and version.parse( - botorch.version.version - ) < version.parse("0.8.1"): - pytest.skip("LogExpectedImprovement is not available in botorch <0.8.1.") - - n_trials = 4 - n_startup_trials = 2 - - sampler = BoTorchSampler( - candidates_func=candidates_func, - n_startup_trials=n_startup_trials, - ) - - study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) - study.optimize( - lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials - ) - - assert len(study.trials) == n_trials - - -@pytest.mark.parametrize( - "candidates_func, n_objectives", - [ - (integration.botorch.logei_candidates_func, 1), - (integration.botorch.qei_candidates_func, 1), - (integration.botorch.qehvi_candidates_func, 2), - (integration.botorch.qparego_candidates_func, 4), - (integration.botorch.qnehvi_candidates_func, 2), - (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 - ], -) -def test_botorch_specify_candidates_func_constrained( - candidates_func: Any, n_objectives: int -) -> None: - n_trials = 4 - n_startup_trials = 2 - constraints_func_call_count = 0 - - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - xs = sum(trial.params[f"x{i}"] for i in range(n_objectives)) - - nonlocal constraints_func_call_count - constraints_func_call_count += 1 - - return (xs - 0.5,) - - sampler = BoTorchSampler( - constraints_func=constraints_func, - candidates_func=candidates_func, - n_startup_trials=n_startup_trials, - ) - - study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) - study.optimize( - lambda t: [t.suggest_float(f"x{i}", 0, 1) for i in range(n_objectives)], n_trials=n_trials - ) - - assert len(study.trials) == n_trials - assert constraints_func_call_count == n_trials - - -def test_botorch_candidates_func_invalid_batch_size() -> None: - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - return torch.rand(2, 1) # Must have the batch size one, not two. - - sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(ValueError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - -def test_botorch_candidates_func_invalid_dimensionality() -> None: - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - return torch.rand(1, 1, 1) # Must have one or two dimensions, not three. - - sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(ValueError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - -def test_botorch_candidates_func_invalid_candidates_size() -> None: - n_params = 3 - - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - return torch.rand(n_params - 1) # Must return candidates for all parameters. - - sampler = BoTorchSampler(candidates_func=candidates_func, n_startup_trials=1) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(ValueError): - study.optimize( - lambda t: sum(t.suggest_float(f"x{i}", 0, 1) for i in range(n_params)), n_trials=3 - ) - - -def test_botorch_constraints_func_invalid_inconsistent_n_constraints() -> None: - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - x0 = trial.params["x0"] - return [x0 - 0.5] * trial.number # Number of constraints may not change. - - sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(RuntimeError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - -def test_botorch_constraints_func_raises() -> None: - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - if trial.number == 1: - raise RuntimeError - return (0.0,) - - sampler = BoTorchSampler(constraints_func=constraints_func) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(RuntimeError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - assert len(study.trials) == 2 - - for trial in study.trials: - sys_con = trial.system_attrs[_CONSTRAINTS_KEY] - - expected_sys_con: Optional[Tuple[int]] - - if trial.number == 0: - expected_sys_con = (0,) - elif trial.number == 1: - expected_sys_con = None - else: - assert False, "Should not reach." - - assert sys_con == expected_sys_con - - -def test_botorch_constraints_func_nan_warning() -> None: - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - if trial.number == 1: - raise RuntimeError - return (0.0,) - - last_trial_number_candidates_func = None - - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - trial_number = train_x.size(0) - - assert train_con is not None - - if trial_number > 0: - assert not train_con[0, :].isnan().any() - if trial_number > 1: - assert train_con[1, :].isnan().all() - if trial_number > 2: - assert not train_con[2, :].isnan().any() - - nonlocal last_trial_number_candidates_func - last_trial_number_candidates_func = trial_number - - return torch.rand(1) - - sampler = BoTorchSampler( - candidates_func=candidates_func, - constraints_func=constraints_func, - n_startup_trials=1, - ) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(RuntimeError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) - - assert len(study.trials) == 2 - - # Warns when `train_con` contains NaN. - with pytest.warns(UserWarning): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) - - assert len(study.trials) == 4 - - assert last_trial_number_candidates_func == study.trials[-1].number - - -def test_botorch_constraints_func_none_warning() -> None: - candidates_func_call_count = 0 - - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - raise RuntimeError - - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - # `train_con` should be `None` if `constraints_func` always fails. - assert train_con is None - - nonlocal candidates_func_call_count - candidates_func_call_count += 1 - - return torch.rand(1) - - sampler = BoTorchSampler( - candidates_func=candidates_func, - constraints_func=constraints_func, - n_startup_trials=1, - ) - - study = optuna.create_study(direction="minimize", sampler=sampler) - - with pytest.raises(RuntimeError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=None) - - assert len(study.trials) == 1 - - # Warns when `train_con` becomes `None`. - with pytest.warns(UserWarning), pytest.raises(RuntimeError): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=1) - - assert len(study.trials) == 2 - - assert candidates_func_call_count == 1 - - -def test_botorch_constraints_func_late() -> None: - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - return (0,) - - last_trial_number_candidates_func = None - - def candidates_func( - train_x: torch.Tensor, - train_obj: torch.Tensor, - train_con: Optional[torch.Tensor], - bounds: torch.Tensor, - running_x: Optional[torch.Tensor], - ) -> torch.Tensor: - trial_number = train_x.size(0) - - if trial_number < 3: - assert train_con is None - if trial_number == 3: - assert train_con is not None - assert train_con[:2, :].isnan().all() - assert not train_con[2, :].isnan().any() - - nonlocal last_trial_number_candidates_func - last_trial_number_candidates_func = trial_number - - return torch.rand(1) - - sampler = BoTorchSampler( - candidates_func=candidates_func, - n_startup_trials=1, - ) - - study = optuna.create_study(direction="minimize", sampler=sampler) - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) - - assert len(study.trials) == 2 - - sampler = BoTorchSampler( - candidates_func=candidates_func, - constraints_func=constraints_func, - n_startup_trials=1, - ) - - study.sampler = sampler - - # Warns when `train_con` contains NaN. Should not raise but will with NaN for previous trials - # that were not computed with constraints. - with pytest.warns(UserWarning): - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=2) - - assert len(study.trials) == 4 - - assert last_trial_number_candidates_func == study.trials[-1].number - - -def test_botorch_n_startup_trials() -> None: - independent_sampler = RandomSampler() - sampler = BoTorchSampler(n_startup_trials=2, independent_sampler=independent_sampler) - study = optuna.create_study(directions=["minimize", "maximize"], sampler=sampler) - - with patch.object( - independent_sampler, "sample_independent", wraps=independent_sampler.sample_independent - ) as mock_independent, patch.object( - sampler, "sample_relative", wraps=sampler.sample_relative - ) as mock_relative: - study.optimize( - lambda t: [t.suggest_float("x0", 0, 1), t.suggest_float("x1", 0, 1)], n_trials=3 - ) - assert mock_independent.call_count == 4 # The objective function has two parameters. - assert mock_relative.call_count == 3 - - -def test_botorch_distributions() -> None: - def objective(trial: Trial) -> float: - x0 = trial.suggest_float("x0", 0, 1) - x1 = trial.suggest_float("x1", 0.1, 1, log=True) - x2 = trial.suggest_float("x2", 0, 1, step=0.1) - x3 = trial.suggest_int("x3", 0, 2) - x4 = trial.suggest_int("x4", 2, 4, log=True) - x5 = trial.suggest_int("x5", 0, 4, step=2) - x6 = trial.suggest_categorical("x6", [0.1, 0.2, 0.3]) - return x0 + x1 + x2 + x3 + x4 + x5 + x6 - - sampler = BoTorchSampler() - - study = optuna.create_study(direction="minimize", sampler=sampler) - study.optimize(objective, n_trials=3) - - assert len(study.trials) == 3 - - -def test_botorch_invalid_different_studies() -> None: - # Using the same sampler with different studies should yield an error since the sampler is - # stateful holding the computed constraints. Two studies are considered different if their - # IDs differ. - # We use the RDB storage since this check does not work for the in-memory storage where all - # study IDs are identically 0. - storage = RDBStorage("sqlite:///:memory:") - - sampler = BoTorchSampler() - - study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) - study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - other_study = optuna.create_study(direction="minimize", sampler=sampler, storage=storage) - with pytest.raises(RuntimeError): - other_study.optimize(lambda t: t.suggest_float("x0", 0, 1), n_trials=3) - - -def test_call_after_trial_of_independent_sampler() -> None: - independent_sampler = optuna.samplers.RandomSampler() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", optuna.exceptions.ExperimentalWarning) - sampler = BoTorchSampler(independent_sampler=independent_sampler) - study = optuna.create_study(sampler=sampler) - with patch.object( - independent_sampler, "after_trial", wraps=independent_sampler.after_trial - ) as mock_object: - study.optimize(lambda _: 1.0, n_trials=1) - assert mock_object.call_count == 1 - - -@pytest.mark.parametrize("device", [None, torch.device("cpu"), torch.device("cuda:0")]) -def test_device_argument(device: Optional[torch.device]) -> None: - sampler = BoTorchSampler(device=device) - if not torch.cuda.is_available() and sampler._device.type == "cuda": - pytest.skip(reason="GPU is unavailable.") - - def objective(trial: Trial) -> float: - return trial.suggest_float("x", 0.0, 1.0) - - def constraints_func(trial: FrozenTrial) -> Sequence[float]: - x0 = trial.params["x"] - return [x0 - 0.5] - - sampler = BoTorchSampler(constraints_func=constraints_func, n_startup_trials=1) - study = optuna.create_study(sampler=sampler) - study.optimize(objective, n_trials=3) - - -@pytest.mark.parametrize( - "candidates_func, n_objectives", - [ - (integration.botorch.qei_candidates_func, 1), - (integration.botorch.qehvi_candidates_func, 2), - (integration.botorch.qparego_candidates_func, 4), - (integration.botorch.qnehvi_candidates_func, 2), - (integration.botorch.qnehvi_candidates_func, 3), # alpha > 0 - ], -) -def test_botorch_consider_running_trials(candidates_func: Any, n_objectives: int) -> None: - sampler = BoTorchSampler( - candidates_func=candidates_func, - n_startup_trials=1, - consider_running_trials=True, - ) - - def objective(trial: Trial) -> Sequence[float]: - ret = [] - for i in range(n_objectives): - val = sum(trial.suggest_float(f"x{i}_{j}", 0, 1) for j in range(2)) - ret.append(val) - return ret - - study = optuna.create_study(directions=["minimize"] * n_objectives, sampler=sampler) - study.optimize(objective, n_trials=2) - assert len(study.trials) == 2 - - # fully suggested running trial - running_trial_full = study.ask() - _ = objective(running_trial_full) - study.optimize(objective, n_trials=1) - assert len(study.trials) == 4 - assert sum(t.state == TrialState.RUNNING for t in study.trials) == 1 - assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 3 - - # partially suggested running trial - running_trial_partial = study.ask() - for i in range(n_objectives): - running_trial_partial.suggest_float(f"x{i}_0", 0, 1) - study.optimize(objective, n_trials=1) - assert len(study.trials) == 6 - assert sum(t.state == TrialState.RUNNING for t in study.trials) == 2 - assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 4 - - # not suggested running trial - _ = study.ask() - study.optimize(objective, n_trials=1) - assert len(study.trials) == 8 - assert sum(t.state == TrialState.RUNNING for t in study.trials) == 3 - assert sum(t.state == TrialState.COMPLETE for t in study.trials) == 5 From e365ec6eccabf1261745dfc9c5d4ac70814bde03 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 7 Feb 2024 11:10:09 +0900 Subject: [PATCH 5/7] Apply isort. --- optuna_integration/botorch.py | 8 ++++---- tests/test_botorch.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/optuna_integration/botorch.py b/optuna_integration/botorch.py index 78746348..c707490a 100644 --- a/optuna_integration/botorch.py +++ b/optuna_integration/botorch.py @@ -7,8 +7,6 @@ import warnings import numpy -from packaging import version - from optuna import logging from optuna._experimental import experimental_class from optuna._experimental import experimental_func @@ -24,6 +22,7 @@ from optuna.study import StudyDirection from optuna.trial import FrozenTrial from optuna.trial import TrialState +from packaging import version with try_import() as _imports: @@ -52,14 +51,15 @@ def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: def _get_sobol_qmc_normal_sampler(num_samples: int) -> SobolQMCNormalSampler: return SobolQMCNormalSampler(torch.Size((num_samples,))) + from gpytorch.mlls import ExactMarginalLogLikelihood + import torch + from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization from botorch.utils.sampling import manual_seed from botorch.utils.sampling import sample_simplex from botorch.utils.transforms import normalize from botorch.utils.transforms import unnormalize - from gpytorch.mlls import ExactMarginalLogLikelihood - import torch _logger = logging.get_logger(__name__) diff --git a/tests/test_botorch.py b/tests/test_botorch.py index 439c84ec..ae21dcbb 100644 --- a/tests/test_botorch.py +++ b/tests/test_botorch.py @@ -5,9 +5,6 @@ from unittest.mock import patch import warnings -from packaging import version -import pytest - import optuna from optuna import integration from optuna._imports import try_import @@ -18,12 +15,15 @@ from optuna.trial import FrozenTrial from optuna.trial import Trial from optuna.trial import TrialState +from packaging import version +import pytest with try_import() as _imports: - import botorch import torch + import botorch + if not _imports.is_successful(): from unittest.mock import MagicMock From 6d190317824c4bd8a01c1a4d4cded2c5adfa9053 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 7 Feb 2024 14:35:07 +0900 Subject: [PATCH 6/7] Fix to use optuna_integration.botorch. --- tests/test_botorch.py | 5 +++-- tests/test_samplers.py | 17 ++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_botorch.py b/tests/test_botorch.py index ae21dcbb..4de749f0 100644 --- a/tests/test_botorch.py +++ b/tests/test_botorch.py @@ -6,9 +6,7 @@ import warnings import optuna -from optuna import integration from optuna._imports import try_import -from optuna.integration import BoTorchSampler from optuna.samplers import RandomSampler from optuna.samplers._base import _CONSTRAINTS_KEY from optuna.storages import RDBStorage @@ -18,6 +16,9 @@ from packaging import version import pytest +import optuna_integration as integration +from optuna_integration import BoTorchSampler + with try_import() as _imports: import torch diff --git a/tests/test_samplers.py b/tests/test_samplers.py index 4977e3ff..c2b6a3ad 100644 --- a/tests/test_samplers.py +++ b/tests/test_samplers.py @@ -18,8 +18,6 @@ from optuna.distributions import CategoricalDistribution from optuna.distributions import FloatDistribution from optuna.distributions import IntDistribution -from optuna.integration.botorch import logei_candidates_func -from optuna.integration.botorch import qei_candidates_func from optuna.samplers import BaseSampler from optuna.samplers._lazy_random_state import LazyRandomState from optuna.study import Study @@ -29,18 +27,23 @@ import pytest +import optuna_integration +from optuna_integration.botorch import logei_candidates_func +from optuna_integration.botorch import qei_candidates_func + + parametrize_sampler = pytest.mark.parametrize( "sampler_class", [ pytest.param( - lambda: optuna.integration.BoTorchSampler( + lambda: optuna_integration.BoTorchSampler( n_startup_trials=0, candidates_func=logei_candidates_func, ), marks=pytest.mark.integration, ), pytest.param( - lambda: optuna.integration.BoTorchSampler( + lambda: optuna_integration.BoTorchSampler( n_startup_trials=0, candidates_func=qei_candidates_func, ), @@ -56,7 +59,7 @@ "multi_objective_sampler_class", [ pytest.param( - lambda: optuna.integration.BoTorchSampler(n_startup_trials=0), + lambda: optuna_integration.BoTorchSampler(n_startup_trials=0), marks=pytest.mark.integration, ), ], @@ -64,7 +67,7 @@ sampler_class_with_seed: dict[str, tuple[Callable[[int], BaseSampler], bool]] = { - "BoTorchSampler": (lambda seed: optuna.integration.BoTorchSampler(seed=seed), True), + "BoTorchSampler": (lambda seed: optuna_integration.BoTorchSampler(seed=seed), True), } param_sampler_with_seed = [] param_sampler_name_with_seed = [] @@ -89,7 +92,7 @@ "sampler_class,expected_has_rng,expected_has_another_sampler", [ pytest.param( - lambda: optuna.integration.BoTorchSampler(n_startup_trials=0), + lambda: optuna_integration.BoTorchSampler(n_startup_trials=0), False, True, marks=pytest.mark.integration, From b185aa4998b5802132f640e661c7613f3954f47f Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 7 Feb 2024 14:35:40 +0900 Subject: [PATCH 7/7] Apply isort. --- tests/test_samplers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_samplers.py b/tests/test_samplers.py index c2b6a3ad..0e311e0c 100644 --- a/tests/test_samplers.py +++ b/tests/test_samplers.py @@ -26,7 +26,6 @@ from optuna.trial import TrialState import pytest - import optuna_integration from optuna_integration.botorch import logei_candidates_func from optuna_integration.botorch import qei_candidates_func