From df0d009fc4867cbe13fd7431b6de03937f4a31e5 Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:58:19 +0900 Subject: [PATCH 1/6] Use isinstance instead of type(...) is ... --- .../integration/_lightgbm_tuner/optimize.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index 6f5bb7cb1d..2096dd3104 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -4,7 +4,9 @@ from collections.abc import Callable from collections.abc import Container from collections.abc import Generator +from collections.abc import Iterable from collections.abc import Iterator +from collections.abc import Sequence import copy import json import os @@ -78,11 +80,11 @@ def _get_metric_for_objective(self) -> str: metric = self.lgbm_params.get("metric", "binary_logloss") # todo (smly): This implementation is different logic from the LightGBM's python bindings. - if type(metric) is str: + if isinstance(metric, str): pass - elif type(metric) is list: + elif isinstance(metric, Sequence): metric = metric[-1] - elif type(metric) is set: + elif isinstance(metric, Iterable): metric = list(metric)[-1] else: raise NotImplementedError @@ -95,17 +97,17 @@ def _get_booster_best_score(self, booster: "lgb.Booster") -> float: valid_sets: VALID_SET_TYPE | None = self.lgbm_kwargs.get("valid_sets") if self.lgbm_kwargs.get("valid_names") is not None: - if type(self.lgbm_kwargs["valid_names"]) is str: + if isinstance(self.lgbm_kwargs["valid_names"], str): valid_name = self.lgbm_kwargs["valid_names"] - elif type(self.lgbm_kwargs["valid_names"]) in [list, tuple]: + elif isinstance(self.lgbm_kwargs["valid_names"], Iterable): valid_name = self.lgbm_kwargs["valid_names"][-1] else: raise NotImplementedError - elif type(valid_sets) is lgb.Dataset: + elif isinstance(valid_sets, lgb.Dataset): valid_name = "valid_0" - elif isinstance(valid_sets, (list, tuple)) and len(valid_sets) > 0: + elif isinstance(valid_sets, Iterable) and len(valid_sets) > 0: valid_set_idx = len(valid_sets) - 1 valid_name = "valid_{}".format(valid_set_idx) @@ -130,9 +132,9 @@ def _metric_with_eval_at(self, metric: str) -> str: eval_at = [1, 2, 3, 4, 5] # Optuna can handle only a single metric. Choose first one. - if type(eval_at) in [list, tuple]: + if isinstance(eval_at, Iterable): return "{}@{}".format(metric, eval_at[0]) - if type(eval_at) is int: + if isinstance(eval_at, int): return "{}@{}".format(metric, eval_at) raise ValueError( "The value of eval_at is expected to be int or a list/tuple of int." From 5657937033852bfa502ae367a2162a545f31e22f Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:17:13 +0900 Subject: [PATCH 2/6] Fix mypy error --- optuna/integration/_lightgbm_tuner/optimize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index 2096dd3104..9ddef19291 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -99,7 +99,7 @@ def _get_booster_best_score(self, booster: "lgb.Booster") -> float: if self.lgbm_kwargs.get("valid_names") is not None: if isinstance(self.lgbm_kwargs["valid_names"], str): valid_name = self.lgbm_kwargs["valid_names"] - elif isinstance(self.lgbm_kwargs["valid_names"], Iterable): + elif isinstance(self.lgbm_kwargs["valid_names"], Sequence): valid_name = self.lgbm_kwargs["valid_names"][-1] else: raise NotImplementedError @@ -107,7 +107,7 @@ def _get_booster_best_score(self, booster: "lgb.Booster") -> float: elif isinstance(valid_sets, lgb.Dataset): valid_name = "valid_0" - elif isinstance(valid_sets, Iterable) and len(valid_sets) > 0: + elif isinstance(valid_sets, Sequence) and len(valid_sets) > 0: valid_set_idx = len(valid_sets) - 1 valid_name = "valid_{}".format(valid_set_idx) @@ -132,8 +132,8 @@ def _metric_with_eval_at(self, metric: str) -> str: eval_at = [1, 2, 3, 4, 5] # Optuna can handle only a single metric. Choose first one. - if isinstance(eval_at, Iterable): - return "{}@{}".format(metric, eval_at[0]) + if isinstance(eval_at, Sequence): + return f"{metric}@{eval_at[0]}" if isinstance(eval_at, int): return "{}@{}".format(metric, eval_at) raise ValueError( From dc3d65b2a0803f005079ee97e1dc85bc020361f5 Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:20:10 +0900 Subject: [PATCH 3/6] Make _metric_with_eval_at() easier to read --- .../integration/_lightgbm_tuner/optimize.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index 9ddef19291..95c27faa6c 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -118,27 +118,27 @@ def _get_booster_best_score(self, booster: "lgb.Booster") -> float: return val_score def _metric_with_eval_at(self, metric: str) -> str: - if metric != "ndcg" and metric != "map": + # The parameter eval_at is only available when the metric is ndcg or map + if metric not in ["ndcg", "map"]: return metric - eval_at = self.lgbm_params.get("eval_at") - if eval_at is None: - eval_at = self.lgbm_params.get("{}_at".format(metric)) - if eval_at is None: - eval_at = self.lgbm_params.get("{}_eval_at".format(metric)) - if eval_at is None: - # Set default value of LightGBM. + eval_at = ( + self.lgbm_params.get("eval_at") + or self.lgbm_params.get(f"{metric}_at") + or self.lgbm_params.get(f"{metric}_eval_at") + # Set default value of LightGBM when no possible key is absent. # See https://lightgbm.readthedocs.io/en/latest/Parameters.html#eval_at. - eval_at = [1, 2, 3, 4, 5] + or [1, 2, 3, 4, 5] + ) # Optuna can handle only a single metric. Choose first one. if isinstance(eval_at, Sequence): return f"{metric}@{eval_at[0]}" if isinstance(eval_at, int): - return "{}@{}".format(metric, eval_at) + return f"{metric}@{eval_at}" raise ValueError( - "The value of eval_at is expected to be int or a list/tuple of int." - "'{}' is specified.".format(eval_at) + f"The value of eval_at is expected to be int or a list/tuple of int. '{eval_at}' is " + "specified." ) def higher_is_better(self) -> bool: From b87ba050b78e994b967ef3c6e33f4d6c01819e3e Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:20:25 +0900 Subject: [PATCH 4/6] Use f-string instead of format --- .../integration/_lightgbm_tuner/optimize.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index 95c27faa6c..a23b8c1230 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -109,7 +109,7 @@ def _get_booster_best_score(self, booster: "lgb.Booster") -> float: elif isinstance(valid_sets, Sequence) and len(valid_sets) > 0: valid_set_idx = len(valid_sets) - 1 - valid_name = "valid_{}".format(valid_set_idx) + valid_name = f"valid_{valid_set_idx}" else: raise NotImplementedError @@ -248,10 +248,10 @@ def __call__(self, trial: optuna.trial.Trial) -> float: average_iteration_time = elapsed_secs / booster.current_iteration() if self.model_dir is not None: - path = os.path.join(self.model_dir, "{}.pkl".format(trial.number)) + path = os.path.join(self.model_dir, f"{trial.number}.pkl") with open(path, "wb") as fout: pickle.dump(booster, fout) - _logger.info("The booster of trial#{} was saved as {}.".format(trial.number, path)) + _logger.info(f"The booster of trial#{trial.number} was saved as {path}.") if self.compare_validation_metrics(val_score, self.best_score): self.best_score = val_score @@ -328,14 +328,14 @@ def __call__(self, trial: optuna.trial.Trial) -> float: average_iteration_time = elapsed_secs / len(val_scores) if self.model_dir is not None and self.lgbm_kwargs.get("return_cvbooster"): - path = os.path.join(self.model_dir, "{}.pkl".format(trial.number)) + path = os.path.join(self.model_dir, f"{trial.number}.pkl") with open(path, "wb") as fout: # At version `lightgbm==3.0.0`, :class:`lightgbm.CVBooster` does not # have `__getstate__` which is required for pickle serialization. cvbooster = cv_results["cvbooster"] assert isinstance(cvbooster, lgb.CVBooster) pickle.dump((cvbooster.boosters, cvbooster.best_iteration), fout) - _logger.info("The booster of trial#{} was saved as {}.".format(trial.number, path)) + _logger.info(f"The booster of trial#{trial.number} was saved as {path}.") if self.compare_validation_metrics(val_score, self.best_score): self.best_score = val_score @@ -420,15 +420,15 @@ def __init__( if self.study.direction != optuna.study.StudyDirection.MAXIMIZE: metric_name = self.lgbm_params.get("metric", "binary_logloss") raise ValueError( - "Study direction is inconsistent with the metric {}. " - "Please set 'maximize' as the direction.".format(metric_name) + f"Study direction is inconsistent with the metric {metric_name}. " + "Please set 'maximize' as the direction." ) else: if self.study.direction != optuna.study.StudyDirection.MINIMIZE: metric_name = self.lgbm_params.get("metric", "binary_logloss") raise ValueError( - "Study direction is inconsistent with the metric {}. " - "Please set 'minimize' as the direction.".format(metric_name) + f"Study direction is inconsistent with the metric {metric_name}. " + "Please set 'minimize' as the direction." ) if verbosity is not None: @@ -845,12 +845,12 @@ def get_best_booster(self) -> "lgb.Booster": ) best_trial = self.study.best_trial - path = os.path.join(self._model_dir, "{}.pkl".format(best_trial.number)) + path = os.path.join(self._model_dir, f"{best_trial.number}.pkl") if not os.path.exists(path): raise ValueError( - "The best booster cannot be found in {}. If you execute `LightGBMTuner` in " - "distributed environment, please use network file system (e.g., NFS) to share " - "models with multiple workers.".format(self._model_dir) + f"The best booster cannot be found in {self._model_dir}. If you execute " + "`LightGBMTuner` in distributed environment, please use network file system " + "(e.g., NFS) to share models with multiple workers." ) with open(path, "rb") as fin: @@ -1042,12 +1042,12 @@ def get_best_booster(self) -> "lgb.CVBooster": ) best_trial = self.study.best_trial - path = os.path.join(self._model_dir, "{}.pkl".format(best_trial.number)) + path = os.path.join(self._model_dir, f"{best_trial.number}.pkl") if not os.path.exists(path): raise ValueError( - "The best booster cannot be found in {}. If you execute `LightGBMTunerCV` in " - "distributed environment, please use network file system (e.g., NFS) to share " - "models with multiple workers.".format(self._model_dir) + f"The best booster cannot be found in {self._model_dir}. If you execute " + "`LightGBMTunerCV` in distributed environment, please use network file system " + "(e.g., NFS) to share models with multiple workers." ) with open(path, "rb") as fin: From 80700a3c8a363d8b52075f5ebceb5ed7c289ba1f Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:24:07 +0900 Subject: [PATCH 5/6] Make _check_target_names_supported() easier to read --- optuna/integration/_lightgbm_tuner/optimize.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index a23b8c1230..b0ef1bfc7e 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -184,18 +184,12 @@ def __init__( self.pbar_fmt = "{}, val_score: {:.6f}" def _check_target_names_supported(self) -> None: - supported_param_names = [ - "lambda_l1", - "lambda_l2", - "num_leaves", - "feature_fraction", - "bagging_fraction", - "bagging_freq", - "min_child_samples", - ] for target_param_name in self.target_param_names: - if target_param_name not in supported_param_names: - raise NotImplementedError("Parameter `{}` is not supported for tuning.") + if target_param_name in _DEFAULT_LIGHTGBM_PARAMETERS: + continue + raise NotImplementedError( + f"Parameter `{target_param_name}` is not supported for tuning." + ) def _preprocess(self, trial: optuna.trial.Trial) -> None: if self.pbar is not None: From e0f6505b53c41056d470f11bea2a03f734b2e03c Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 8 Sep 2023 17:59:32 +0900 Subject: [PATCH 6/6] Raise error when eval_at is str --- optuna/integration/_lightgbm_tuner/optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py index b0ef1bfc7e..098c41c1d4 100644 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ b/optuna/integration/_lightgbm_tuner/optimize.py @@ -132,7 +132,7 @@ def _metric_with_eval_at(self, metric: str) -> str: ) # Optuna can handle only a single metric. Choose first one. - if isinstance(eval_at, Sequence): + if isinstance(eval_at, (list, tuple)): return f"{metric}@{eval_at[0]}" if isinstance(eval_at, int): return f"{metric}@{eval_at}"