Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package] stop relying on string concatenation / splitting for cv() eval results #6761

Merged
merged 14 commits into from
Dec 22, 2024
72 changes: 38 additions & 34 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ class CallbackEnv:
evaluation_result_list: Optional[_ListOfEvalResultTuples]


def _is_using_cv(env: CallbackEnv) -> bool:
"""Check if model in callback env is a CVBooster."""
# this import is here to avoid a circular import
from .engine import CVBooster

return isinstance(env.model, CVBooster)


def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str:
"""Format metric string."""
dataset_name, metric_name, metric_value, *_ = value
Expand Down Expand Up @@ -143,16 +151,13 @@ def _init(self, env: CallbackEnv) -> None:
)
self.eval_result.clear()
for item in env.evaluation_result_list:
if len(item) == 4: # regular train
data_name, eval_name = item[:2]
else: # cv
data_name, eval_name = item[1].split()
self.eval_result.setdefault(data_name, OrderedDict())
dataset_name, metric_name, *_ = item
self.eval_result.setdefault(dataset_name, OrderedDict())
if len(item) == 4:
self.eval_result[data_name].setdefault(eval_name, [])
self.eval_result[dataset_name].setdefault(metric_name, [])
else:
self.eval_result[data_name].setdefault(f"{eval_name}-mean", [])
self.eval_result[data_name].setdefault(f"{eval_name}-stdv", [])
self.eval_result[dataset_name].setdefault(f"{metric_name}-mean", [])
self.eval_result[dataset_name].setdefault(f"{metric_name}-stdv", [])

def __call__(self, env: CallbackEnv) -> None:
if env.iteration == env.begin_iteration:
Expand All @@ -163,15 +168,16 @@ def __call__(self, env: CallbackEnv) -> None:
"Please report it at https://github.com/microsoft/LightGBM/issues"
)
for item in env.evaluation_result_list:
# for cv(), 'metric_value' is actually a mean of metric values over all CV folds
dataset_name, metric_name, metric_value, *_ = item
if len(item) == 4:
data_name, eval_name, result = item[:3]
self.eval_result[data_name][eval_name].append(result)
# train()
self.eval_result[dataset_name][metric_name].append(metric_value)
else:
data_name, eval_name = item[1].split()
res_mean = item[2]
res_stdv = item[4] # type: ignore[misc]
self.eval_result[data_name][f"{eval_name}-mean"].append(res_mean)
self.eval_result[data_name][f"{eval_name}-stdv"].append(res_stdv)
# cv()
metric_std_dev = item[4] # type: ignore[misc]
self.eval_result[dataset_name][f"{metric_name}-mean"].append(metric_value)
self.eval_result[dataset_name][f"{metric_name}-stdv"].append(metric_std_dev)


def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable:
Expand Down Expand Up @@ -304,15 +310,15 @@ def _gt_delta(self, curr_score: float, best_score: float, delta: float) -> bool:
def _lt_delta(self, curr_score: float, best_score: float, delta: float) -> bool:
return curr_score < best_score - delta

def _is_train_set(self, ds_name: str, eval_name: str, env: CallbackEnv) -> bool:
def _is_train_set(self, dataset_name: str, env: CallbackEnv) -> bool:
"""Check, by name, if a given Dataset is the training data."""
# for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set
# and those metrics are considered for early stopping
if ds_name == "cv_agg" and eval_name == "train":
if _is_using_cv(env) and dataset_name == "train":
return True

# for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name
if isinstance(env.model, Booster) and ds_name == env.model._train_data_name:
if isinstance(env.model, Booster) and dataset_name == env.model._train_data_name:
return True

return False
Expand All @@ -327,11 +333,13 @@ def _init(self, env: CallbackEnv) -> None:
_log_warning("Early stopping is not available in dart mode")
return

# get details of the first dataset
first_dataset_name, first_metric_name, *_ = env.evaluation_result_list[0]

# validation sets are guaranteed to not be identical to the training data in cv()
if isinstance(env.model, Booster):
only_train_set = len(env.evaluation_result_list) == 1 and self._is_train_set(
ds_name=env.evaluation_result_list[0][0],
eval_name=env.evaluation_result_list[0][1].split(" ")[0],
dataset_name=first_dataset_name,
env=env,
)
if only_train_set:
Expand Down Expand Up @@ -370,8 +378,7 @@ def _init(self, env: CallbackEnv) -> None:
_log_info(f"Using {self.min_delta} as min_delta for all metrics.")
deltas = [self.min_delta] * n_datasets * n_metrics

# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
self.first_metric = env.evaluation_result_list[0][1].split(" ")[-1]
self.first_metric = first_metric_name
for eval_ret, delta in zip(env.evaluation_result_list, deltas):
self.best_iter.append(0)
if eval_ret[3]: # greater is better
Expand All @@ -381,15 +388,15 @@ def _init(self, env: CallbackEnv) -> None:
self.best_score.append(float("inf"))
self.cmp_op.append(partial(self._lt_delta, delta=delta))

def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str], i: int) -> None:
def _final_iteration_check(self, *, env: CallbackEnv, metric_name: str, i: int) -> None:
if env.iteration == env.end_iteration - 1:
if self.verbose:
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
_log_info(
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
)
if self.first_metric_only:
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
_log_info(f"Evaluated only: {metric_name}")
raise EarlyStopException(self.best_iter[i], self.best_score_list[i])

def __call__(self, env: CallbackEnv) -> None:
Expand All @@ -405,21 +412,18 @@ def __call__(self, env: CallbackEnv) -> None:
# self.best_score_list is initialized to an empty list
first_time_updating_best_score_list = self.best_score_list == []
for i in range(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2]
if first_time_updating_best_score_list or self.cmp_op[i](score, self.best_score[i]):
self.best_score[i] = score
dataset_name, metric_name, metric_value, *_ = env.evaluation_result_list[i]
if first_time_updating_best_score_list or self.cmp_op[i](metric_value, self.best_score[i]):
self.best_score[i] = metric_value
self.best_iter[i] = env.iteration
if first_time_updating_best_score_list:
self.best_score_list.append(env.evaluation_result_list)
else:
self.best_score_list[i] = env.evaluation_result_list
# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
eval_name_splitted = env.evaluation_result_list[i][1].split(" ")
if self.first_metric_only and self.first_metric != eval_name_splitted[-1]:
if self.first_metric_only and self.first_metric != metric_name:
continue # use only the first metric for early stopping
if self._is_train_set(
ds_name=env.evaluation_result_list[i][0],
eval_name=eval_name_splitted[0],
dataset_name=dataset_name,
env=env,
):
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
Expand All @@ -430,9 +434,9 @@ def __call__(self, env: CallbackEnv) -> None:
)
_log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}")
if self.first_metric_only:
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
_log_info(f"Evaluated only: {metric_name}")
raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
self._final_iteration_check(env, eval_name_splitted, i)
self._final_iteration_check(env=env, metric_name=metric_name, i=i)


def _should_enable_early_stopping(stopping_rounds: Any) -> bool:
Expand Down
38 changes: 27 additions & 11 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,15 +581,31 @@ def _agg_cv_result(
raw_results: List[List[_LGBM_BoosterEvalMethodResultType]],
) -> List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]:
"""Aggregate cross-validation results."""
cvmap: Dict[str, List[float]] = OrderedDict()
metric_type: Dict[str, bool] = {}
# build up 2 maps, of the form:
#
# OrderedDict{
# (<dataset_name>, <metric_name>): <is_higher_better>
# }
#
# OrderedDict{
# (<dataset_name>, <metric_name>): list[<metric_value>]
# }
#
metric_types: Dict[Tuple[str, str], bool] = OrderedDict()
metric_values: Dict[Tuple[str, str], List[float]] = OrderedDict()
for one_result in raw_results:
for one_line in one_result:
key = f"{one_line[0]} {one_line[1]}"
metric_type[key] = one_line[3]
cvmap.setdefault(key, [])
cvmap[key].append(one_line[2])
return [("cv_agg", k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, removing this "cv_agg" string literal, is the key change... everything else flows from that.

for dataset_name, metric_name, metric_value, is_higher_better in one_result:
key = (dataset_name, metric_name)
metric_types[key] = is_higher_better
metric_values.setdefault(key, [])
metric_values[key].append(metric_value)

# turn that into a list of tuples of the form:
#
# [
# (<dataset_name>, <metric_name>, mean(<values>), <is_higher_better>, std_dev(<values>))
# ]
return [(k[0], k[1], float(np.mean(v)), metric_types[k], float(np.std(v))) for k, v in metric_values.items()]


def cv(
Expand Down Expand Up @@ -812,9 +828,9 @@ def cv(
)
cvbooster.update(fobj=fobj) # type: ignore[call-arg]
res = _agg_cv_result(cvbooster.eval_valid(feval)) # type: ignore[call-arg]
for _, key, mean, _, std in res:
results[f"{key}-mean"].append(mean)
results[f"{key}-stdv"].append(std)
for dataset_name, metric_name, metric_mean, _, metric_std_dev in res:
results[f"{dataset_name} {metric_name}-mean"].append(metric_mean)
results[f"{dataset_name} {metric_name}-stdv"].append(metric_std_dev)
try:
for cb in callbacks_after_iter:
cb(
Expand Down
21 changes: 21 additions & 0 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ def constant_metric(preds, train_data):
return ("error", 0.0, False)


def constant_metric_multi(preds, train_data):
return [
("important_metric", 1.5, False),
("irrelevant_metric", 7.8, False),
]


def decreasing_metric(preds, train_data):
return ("decreasing_metric", next(decreasing_generator), False)

Expand Down Expand Up @@ -2570,6 +2577,13 @@ def train_booster(params=params_obj_verbose, **kwargs):
assert "valid binary_logloss-mean" in res
assert "valid error-mean" in res

# default metric in args with 1 custom function returning a list of 2 metrics
res = get_cv_result(metrics="binary_logloss", feval=constant_metric_multi)
assert len(res) == 6
assert "valid binary_logloss-mean" in res
assert res["valid important_metric-mean"] == [1.5, 1.5]
assert res["valid irrelevant_metric-mean"] == [7.8, 7.8]

# non-default metric in args with custom one
res = get_cv_result(metrics="binary_error", feval=constant_metric)
assert len(res) == 4
Expand Down Expand Up @@ -2703,6 +2717,13 @@ def train_booster(params=params_obj_verbose, **kwargs):
assert "binary_logloss" in evals_result["valid_0"]
assert "error" in evals_result["valid_0"]

# default metric in params with custom function returning a list of 2 metrics
train_booster(params=params_obj_metric_log_verbose, feval=constant_metric_multi)
assert len(evals_result["valid_0"]) == 3
assert "binary_logloss" in evals_result["valid_0"]
assert evals_result["valid_0"]["important_metric"] == [1.5, 1.5]
assert evals_result["valid_0"]["irrelevant_metric"] == [7.8, 7.8]

# non-default metric in params with custom one
train_booster(params=params_obj_metric_err_verbose, feval=constant_metric)
assert len(evals_result["valid_0"]) == 2
Expand Down
Loading