From 3c97410d07c7d14bb17fbc91dec4b4698c12399b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 20:35:24 -0600 Subject: [PATCH 01/12] unpack into named variables --- python-package/lightgbm/engine.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index dca6b607cdc7..9b01d17656d2 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -585,10 +585,11 @@ def _agg_cv_result( metric_type: Dict[str, bool] = {} for one_result in raw_results: for one_line in one_result: - key = f"{one_line[0]} {one_line[1]}" - metric_type[key] = one_line[3] + dataset_name, metric_name, metric_value, is_higher_better = one_line + key = f"{dataset_name} {metric_name}" + metric_type[key] = is_higher_better cvmap.setdefault(key, []) - cvmap[key].append(one_line[2]) + cvmap[key].append(metric_value) return [("cv_agg", k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()] From 3c94663b868bc0970149ee7b4326ed61a5e74db4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 21:44:39 -0600 Subject: [PATCH 02/12] still working --- python-package/lightgbm/callback.py | 30 +++++++++++------------ python-package/lightgbm/engine.py | 37 +++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index c64fb8ba755b..7544b0a1d12b 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -143,16 +143,13 @@ def _init(self, env: CallbackEnv) -> None: ) self.eval_result.clear() for item in env.evaluation_result_list: - if len(item) == 4: # regular train - data_name, eval_name = item[:2] - else: # cv - data_name, eval_name = item[1].split() - self.eval_result.setdefault(data_name, OrderedDict()) + dataset_name, metric_name, *_ = item + self.eval_result.setdefault(dataset_name, OrderedDict()) if len(item) == 4: - self.eval_result[data_name].setdefault(eval_name, []) + self.eval_result[dataset_name].setdefault(metric_name, []) else: - self.eval_result[data_name].setdefault(f"{eval_name}-mean", []) - self.eval_result[data_name].setdefault(f"{eval_name}-stdv", []) + self.eval_result[dataset_name].setdefault(f"{metric_name}-mean", []) + self.eval_result[dataset_name].setdefault(f"{metric_name}-stdv", []) def __call__(self, env: CallbackEnv) -> None: if env.iteration == env.begin_iteration: @@ -163,15 +160,16 @@ def __call__(self, env: CallbackEnv) -> None: "Please report it at https://github.com/microsoft/LightGBM/issues" ) for item in env.evaluation_result_list: + # for cv(), 'metric_value' is actually a mean of metric values over all CV folds + dataset_name, metric_name, metric_value, *_ = item if len(item) == 4: - data_name, eval_name, result = item[:3] - self.eval_result[data_name][eval_name].append(result) + # train() + self.eval_result[dataset_name][metric_name].append(metric_value) else: - data_name, eval_name = item[1].split() - res_mean = item[2] - res_stdv = item[4] # type: ignore[misc] - self.eval_result[data_name][f"{eval_name}-mean"].append(res_mean) - self.eval_result[data_name][f"{eval_name}-stdv"].append(res_stdv) + # cv() + metric_std_dev = item[4] # type: ignore[misc] + self.eval_result[dataset_name][f"{metric_name}-mean"].append(metric_value) + self.eval_result[dataset_name][f"{metric_name}-stdv"].append(metric_std_dev) def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable: @@ -308,7 +306,7 @@ def _is_train_set(self, ds_name: str, eval_name: str, env: CallbackEnv) -> bool: """Check, by name, if a given Dataset is the training data.""" # for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set # and those metrics are considered for early stopping - if ds_name == "cv_agg" and eval_name == "train": + if env.model.__class__.__name__ == "CVBooster" and eval_name == "train": return True # for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 9b01d17656d2..8a2e03a7c5a0 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -581,16 +581,33 @@ def _agg_cv_result( raw_results: List[List[_LGBM_BoosterEvalMethodResultType]], ) -> List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]: """Aggregate cross-validation results.""" - cvmap: Dict[str, List[float]] = OrderedDict() - metric_type: Dict[str, bool] = {} + # build up a map of the form + # + # { + # (, ): { + # : bool, + # : list[float], + # } + # } + # + cvmap: Dict[Tuple[str, str], List[float]] = OrderedDict() for one_result in raw_results: for one_line in one_result: dataset_name, metric_name, metric_value, is_higher_better = one_line - key = f"{dataset_name} {metric_name}" - metric_type[key] = is_higher_better - cvmap.setdefault(key, []) - cvmap[key].append(metric_value) - return [("cv_agg", k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()] + key = (dataset_name, metric_name) + cvmap.setdefault(key, defaultdict(list)) + cvmap[key]["is_higher_better"] = is_higher_better + cvmap[key]["values"].append(metric_value) + + # turn that into a list of tuples of the form: + # + # [ + # (, , mean(), , std_dev()) + # ] + return [ + (k[0], k[1], float(np.mean(v["values"])), v["is_higher_better"], float(np.std(v["values"]))) + for k, v in cvmap.items() + ] def cv( @@ -813,9 +830,9 @@ def cv( ) cvfolds.update(fobj=fobj) # type: ignore[call-arg] res = _agg_cv_result(cvfolds.eval_valid(feval)) # type: ignore[call-arg] - for _, key, mean, _, std in res: - results[f"{key}-mean"].append(mean) - results[f"{key}-stdv"].append(std) + for dataset_name, metric_name, metric_mean, _, metric_std_dev in res: + results[f"{dataset_name} {metric_name}-mean"].append(metric_mean) + results[f"{dataset_name} {metric_name}-stdv"].append(metric_std_dev) try: for cb in callbacks_after_iter: cb( From 57a080f5bf7a9b3240c866ad5ffdba7f5127d016 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 21:51:11 -0600 Subject: [PATCH 03/12] more simplification --- python-package/lightgbm/callback.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 7544b0a1d12b..d55315ad0983 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -325,11 +325,14 @@ def _init(self, env: CallbackEnv) -> None: _log_warning("Early stopping is not available in dart mode") return + # get details of the first dataset + first_dataset_name, first_metric_name, *_ = env.evaluation_result_list[0] + # validation sets are guaranteed to not be identical to the training data in cv() if isinstance(env.model, Booster): only_train_set = len(env.evaluation_result_list) == 1 and self._is_train_set( - ds_name=env.evaluation_result_list[0][0], - eval_name=env.evaluation_result_list[0][1].split(" ")[0], + ds_name=first_dataset_name, + eval_name=first_dataset_name, env=env, ) if only_train_set: @@ -368,8 +371,7 @@ def _init(self, env: CallbackEnv) -> None: _log_info(f"Using {self.min_delta} as min_delta for all metrics.") deltas = [self.min_delta] * n_datasets * n_metrics - # split is needed for " " case (e.g. "train l1") - self.first_metric = env.evaluation_result_list[0][1].split(" ")[-1] + self.first_metric = first_metric_name for eval_ret, delta in zip(env.evaluation_result_list, deltas): self.best_iter.append(0) if eval_ret[3]: # greater is better From 599a732071628cb3e90ca708729c3a45ae6d5c1c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 21:59:21 -0600 Subject: [PATCH 04/12] more refactoring --- python-package/lightgbm/callback.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index d55315ad0983..b01e2e29096b 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -381,7 +381,7 @@ def _init(self, env: CallbackEnv) -> None: self.best_score.append(float("inf")) self.cmp_op.append(partial(self._lt_delta, delta=delta)) - def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str], i: int) -> None: + def _final_iteration_check(self, *, env: CallbackEnv, metric_name: str, i: int) -> None: if env.iteration == env.end_iteration - 1: if self.verbose: best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]]) @@ -389,7 +389,7 @@ def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str] "Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}" ) if self.first_metric_only: - _log_info(f"Evaluated only: {eval_name_splitted[-1]}") + _log_info(f"Evaluated only: {metric_name}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) def __call__(self, env: CallbackEnv) -> None: @@ -405,21 +405,19 @@ def __call__(self, env: CallbackEnv) -> None: # self.best_score_list is initialized to an empty list first_time_updating_best_score_list = self.best_score_list == [] for i in range(len(env.evaluation_result_list)): - score = env.evaluation_result_list[i][2] - if first_time_updating_best_score_list or self.cmp_op[i](score, self.best_score[i]): - self.best_score[i] = score + dataset_name, metric_name, metric_value, *_ = env.evaluation_result_list[i] + if first_time_updating_best_score_list or self.cmp_op[i](metric_value, self.best_score[i]): + self.best_score[i] = metric_value self.best_iter[i] = env.iteration if first_time_updating_best_score_list: self.best_score_list.append(env.evaluation_result_list) else: self.best_score_list[i] = env.evaluation_result_list - # split is needed for " " case (e.g. "train l1") - eval_name_splitted = env.evaluation_result_list[i][1].split(" ") - if self.first_metric_only and self.first_metric != eval_name_splitted[-1]: + if self.first_metric_only and self.first_metric != metric_name: continue # use only the first metric for early stopping if self._is_train_set( - ds_name=env.evaluation_result_list[i][0], - eval_name=eval_name_splitted[0], + ds_name=dataset_name, + eval_name=dataset_name, env=env, ): continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train) @@ -430,9 +428,9 @@ def __call__(self, env: CallbackEnv) -> None: ) _log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}") if self.first_metric_only: - _log_info(f"Evaluated only: {eval_name_splitted[-1]}") + _log_info(f"Evaluated only: {metric_name}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) - self._final_iteration_check(env, eval_name_splitted, i) + self._final_iteration_check(env=env, metric_name=metric_name, i=i) def _should_enable_early_stopping(stopping_rounds: Any) -> bool: From 4fce6ada30583547854e790b2fe1a78d8e5fe626 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 22:31:08 -0600 Subject: [PATCH 05/12] more refactoring --- python-package/lightgbm/engine.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 8a2e03a7c5a0..64f44751b5d3 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -581,23 +581,25 @@ def _agg_cv_result( raw_results: List[List[_LGBM_BoosterEvalMethodResultType]], ) -> List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]: """Aggregate cross-validation results.""" - # build up a map of the form + # build up 2 maps, of the form: # - # { - # (, ): { - # : bool, - # : list[float], - # } + # OrderedDict{ + # (, ): # } # - cvmap: Dict[Tuple[str, str], List[float]] = OrderedDict() + # OrderedDict{ + # (, ): list[] + # } + # + metric_types: Dict[Tuple[str, str], bool] = OrderedDict() + metric_values: Dict[Tuple[str, str], List[float]] = OrderedDict() for one_result in raw_results: for one_line in one_result: dataset_name, metric_name, metric_value, is_higher_better = one_line key = (dataset_name, metric_name) - cvmap.setdefault(key, defaultdict(list)) - cvmap[key]["is_higher_better"] = is_higher_better - cvmap[key]["values"].append(metric_value) + metric_types[key] = is_higher_better + metric_values.setdefault(key, []) + metric_values[key].append(metric_value) # turn that into a list of tuples of the form: # @@ -605,8 +607,8 @@ def _agg_cv_result( # (, , mean(), , std_dev()) # ] return [ - (k[0], k[1], float(np.mean(v["values"])), v["is_higher_better"], float(np.std(v["values"]))) - for k, v in cvmap.items() + (k[0], k[1], float(np.mean(metric_values[k])), metric_types[k], float(np.std(metric_values[k]))) + for k, v in metric_values.items() ] From 7386897ddc79dd94511427a215c4c8db89928aad Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 22:53:05 -0600 Subject: [PATCH 06/12] simplify _is_train_set() --- python-package/lightgbm/callback.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index b01e2e29096b..9d57097a32ee 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -302,15 +302,15 @@ def _gt_delta(self, curr_score: float, best_score: float, delta: float) -> bool: def _lt_delta(self, curr_score: float, best_score: float, delta: float) -> bool: return curr_score < best_score - delta - def _is_train_set(self, ds_name: str, eval_name: str, env: CallbackEnv) -> bool: + def _is_train_set(self, dataset_name: str, env: CallbackEnv) -> bool: """Check, by name, if a given Dataset is the training data.""" # for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set # and those metrics are considered for early stopping - if env.model.__class__.__name__ == "CVBooster" and eval_name == "train": + if env.model.__class__.__name__ == "CVBooster" and dataset_name == "train": return True # for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name - if isinstance(env.model, Booster) and ds_name == env.model._train_data_name: + if isinstance(env.model, Booster) and dataset_name == env.model._train_data_name: return True return False @@ -331,8 +331,7 @@ def _init(self, env: CallbackEnv) -> None: # validation sets are guaranteed to not be identical to the training data in cv() if isinstance(env.model, Booster): only_train_set = len(env.evaluation_result_list) == 1 and self._is_train_set( - ds_name=first_dataset_name, - eval_name=first_dataset_name, + dataset_name=first_dataset_name, env=env, ) if only_train_set: @@ -416,8 +415,7 @@ def __call__(self, env: CallbackEnv) -> None: if self.first_metric_only and self.first_metric != metric_name: continue # use only the first metric for early stopping if self._is_train_set( - ds_name=dataset_name, - eval_name=dataset_name, + dataset_name=dataset_name, env=env, ): continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train) From cf12c81108e8c8a788baa3a01231560da4cd9e01 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 23:01:02 -0600 Subject: [PATCH 07/12] bit more refactoring --- python-package/lightgbm/callback.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 9d57097a32ee..3e990b77f3b9 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -71,6 +71,10 @@ class CallbackEnv: evaluation_result_list: Optional[_ListOfEvalResultTuples] +def _using_cv(env: CallbackEnv) -> bool: + return env.__class__.__name__ == "CVBooster" + + def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str: """Format metric string.""" dataset_name, metric_name, metric_value, *_ = value @@ -306,7 +310,7 @@ def _is_train_set(self, dataset_name: str, env: CallbackEnv) -> bool: """Check, by name, if a given Dataset is the training data.""" # for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set # and those metrics are considered for early stopping - if env.model.__class__.__name__ == "CVBooster" and dataset_name == "train": + if _using_cv(env) and dataset_name == "train": return True # for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name From a989f5f4593356d4a7d91d6d09178177f859faf4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 23:09:31 -0600 Subject: [PATCH 08/12] update _using_cv() check --- python-package/lightgbm/callback.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 3e990b77f3b9..ed4d0e23905f 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -72,7 +72,11 @@ class CallbackEnv: def _using_cv(env: CallbackEnv) -> bool: - return env.__class__.__name__ == "CVBooster" + """Check if model in callback env is a CVBooster""" + # this string-matching is used instead of isinstance() to avoid a circular import + return env.model.__class__.__name__ == "CVBooster" or any( + c.__name__ == "CVBooster" for c in env.model.__class__.__bases__ + ) def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str: From 41763270a01d784f01d252abb5928cb5de0e74dc Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 23:44:38 -0600 Subject: [PATCH 09/12] formatting --- python-package/lightgbm/callback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index ed4d0e23905f..be4fde7a487f 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -72,7 +72,7 @@ class CallbackEnv: def _using_cv(env: CallbackEnv) -> bool: - """Check if model in callback env is a CVBooster""" + """Check if model in callback env is a CVBooster.""" # this string-matching is used instead of isinstance() to avoid a circular import return env.model.__class__.__name__ == "CVBooster" or any( c.__name__ == "CVBooster" for c in env.model.__class__.__bases__ From d116279bb571f309f011f6d67824866590c4f300 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Dec 2024 23:26:43 -0600 Subject: [PATCH 10/12] Update python-package/lightgbm/engine.py Co-authored-by: Nikita Titov --- python-package/lightgbm/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 64f44751b5d3..cca29034b39c 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -607,7 +607,7 @@ def _agg_cv_result( # (, , mean(), , std_dev()) # ] return [ - (k[0], k[1], float(np.mean(metric_values[k])), metric_types[k], float(np.std(metric_values[k]))) + (k[0], k[1], float(np.mean(v)), metric_types[k], float(np.std(v))) for k, v in metric_values.items() ] From 5b25aed3dbb191c473f936bdb5a78cc97e8e969e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Dec 2024 23:57:35 -0600 Subject: [PATCH 11/12] simplify --- python-package/lightgbm/callback.py | 12 ++++++------ python-package/lightgbm/engine.py | 3 +-- tests/python_package_test/test_engine.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index be4fde7a487f..be8d04ed7a5e 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -71,12 +71,12 @@ class CallbackEnv: evaluation_result_list: Optional[_ListOfEvalResultTuples] -def _using_cv(env: CallbackEnv) -> bool: +def _is_using_cv(env: CallbackEnv) -> bool: """Check if model in callback env is a CVBooster.""" - # this string-matching is used instead of isinstance() to avoid a circular import - return env.model.__class__.__name__ == "CVBooster" or any( - c.__name__ == "CVBooster" for c in env.model.__class__.__bases__ - ) + # this import is here to avoid a circular import + from .engine import CVBooster + + return isinstance(env.model, CVBooster) def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str: @@ -314,7 +314,7 @@ def _is_train_set(self, dataset_name: str, env: CallbackEnv) -> bool: """Check, by name, if a given Dataset is the training data.""" # for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set # and those metrics are considered for early stopping - if _using_cv(env) and dataset_name == "train": + if _is_using_cv(env) and dataset_name == "train": return True # for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 64f44751b5d3..55ba333fa6a6 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -594,8 +594,7 @@ def _agg_cv_result( metric_types: Dict[Tuple[str, str], bool] = OrderedDict() metric_values: Dict[Tuple[str, str], List[float]] = OrderedDict() for one_result in raw_results: - for one_line in one_result: - dataset_name, metric_name, metric_value, is_higher_better = one_line + for dataset_name, metric_name, metric_value, is_higher_better in one_result: key = (dataset_name, metric_name) metric_types[key] = is_higher_better metric_values.setdefault(key, []) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 05afddb77c77..a1797d1c1187 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -64,6 +64,13 @@ def constant_metric(preds, train_data): return ("error", 0.0, False) +def constant_metric_multi(preds, train_data): + return [ + ("important_metric", 1.5, False), + ("irrelevant_metric", 7.8, False), + ] + + def decreasing_metric(preds, train_data): return ("decreasing_metric", next(decreasing_generator), False) @@ -2570,6 +2577,13 @@ def train_booster(params=params_obj_verbose, **kwargs): assert "valid binary_logloss-mean" in res assert "valid error-mean" in res + # default metric in args with 1 custom function returning a list of 2 metrics + res = get_cv_result(metrics="binary_logloss", feval=constant_metric_multi) + assert len(res) == 6 + assert "valid binary_logloss-mean" in res + assert res["valid important_metric-mean"] == [1.5, 1.5] + assert res["valid irrelevant_metric-mean"] == [7.8, 7.8] + # non-default metric in args with custom one res = get_cv_result(metrics="binary_error", feval=constant_metric) assert len(res) == 4 @@ -2703,6 +2717,13 @@ def train_booster(params=params_obj_verbose, **kwargs): assert "binary_logloss" in evals_result["valid_0"] assert "error" in evals_result["valid_0"] + # default metric in params with custom function returning a list of 2 metrics + train_booster(params=params_obj_metric_log_verbose, feval=constant_metric_multi) + assert len(evals_result["valid_0"]) == 3 + assert "binary_logloss" in evals_result["valid_0"] + assert evals_result["valid_0"]["important_metric"] == [1.5, 1.5] + assert evals_result["valid_0"]["irrelevant_metric"] == [7.8, 7.8] + # non-default metric in params with custom one train_booster(params=params_obj_metric_err_verbose, feval=constant_metric) assert len(evals_result["valid_0"]) == 2 From b8220b75e579fa460c4a3e3ac7b5db1945427ee6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 18 Dec 2024 00:04:49 -0600 Subject: [PATCH 12/12] ruff auto-formatting --- python-package/lightgbm/engine.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 898dbb91a16e..ccbb4376a89f 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -605,10 +605,7 @@ def _agg_cv_result( # [ # (, , mean(), , std_dev()) # ] - return [ - (k[0], k[1], float(np.mean(v)), metric_types[k], float(np.std(v))) - for k, v in metric_values.items() - ] + return [(k[0], k[1], float(np.mean(v)), metric_types[k], float(np.std(v))) for k, v in metric_values.items()] def cv(