From d4325c5a4428b120d8611b6cba9b821f3a5f3b12 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 6 Sep 2020 02:41:37 +0100 Subject: [PATCH] [R-package] added support for first_metric_only (fixes #2368) (#2912) * [R-package] started implementing first_metric_only * trying stuff * more changes * fixed handling of multiple metrics * fixed tests * remove duplicate tests * get training tests * fixes for lgb.cv() * fixes for lgb.cv() * fix linting --- R-package/R/aliases.R | 14 + R-package/R/callback.R | 12 +- R-package/R/lgb.cv.R | 38 +- R-package/R/lgb.train.R | 47 ++- R-package/R/lightgbm.R | 51 +++ R-package/R/utils.R | 33 +- R-package/man/lgb.cv.Rd | 55 ++- R-package/man/lgb.train.Rd | 51 ++- R-package/man/lgb_shared_params.Rd | 55 +++ R-package/man/lightgbm.Rd | 17 + R-package/tests/testthat/test_basic.R | 529 +++++++++++++++++++++++++- R-package/tests/testthat/test_utils.R | 9 + 12 files changed, 871 insertions(+), 40 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 09cb86629872..8176125b6f2f 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -108,3 +108,17 @@ ) return(c(learning_params, .DATASET_PARAMETERS())) } + +# [description] +# Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric, +# a few different strings can be used to indicate "no metrics". +# [returns] +# A character vector +.NO_METRIC_STRINGS <- function() { + return(c( + "na" + , "None" + , "null" + , "custom" + )) +} diff --git a/R-package/R/callback.R b/R-package/R/callback.R index 4edb2d0e19b1..9f56a445db25 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -268,7 +268,7 @@ cb.record.evaluation <- function() { } -cb.early.stop <- function(stopping_rounds, verbose = TRUE) { +cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = TRUE) { # Initialize variables factor_to_bigger_better <- NULL @@ -325,8 +325,16 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) { # Store iteration cur_iter <- env$iteration + # By default, any metric can trigger early stopping. This can be disabled + # with 'first_metric_only = TRUE' + if (isTRUE(first_metric_only)) { + evals_to_check <- 1L + } else { + evals_to_check <- seq_len(eval_len) + } + # Loop through evaluation - for (i in seq_len(eval_len)) { + for (i in evals_to_check) { # Store score score <- env$eval_list[[i]]$value * factor_to_bigger_better[i] diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 4a4157644eb3..671c0f10a850 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -24,10 +24,6 @@ CVBooster <- R6::R6Class( #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param weight vector of response values. If not NULL, will set to dataset -#' @param obj objective function, can be character or custom objective function. Examples include -#' \code{regression}, \code{regression_l1}, \code{huber}, -#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function, can be (list of) character or custom eval function #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param showsd \code{boolean}, whether to show standard deviation of cross validation #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified @@ -52,7 +48,7 @@ CVBooster <- R6::R6Class( #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } -#' +#' @inheritSection lgb_shared_params Early Stopping #' @return a trained model \code{lgb.CVBooster}. #' #' @examples @@ -114,7 +110,7 @@ lgb.cv <- function(params = list() params <- lgb.check.obj(params, obj) params <- lgb.check.eval(params, eval) fobj <- NULL - feval <- NULL + eval_functions <- list(NULL) # Check for objective (function or not) if (is.function(params$objective)) { @@ -122,9 +118,17 @@ lgb.cv <- function(params = list() params$objective <- "NONE" } - # Check for loss (function or not) + # If loss is a single function, store it as a 1-element list + # (for backwards compatibility). If it is a list of functions, store + # all of them if (is.function(eval)) { - feval <- eval + eval_functions <- list(eval) + } + if (methods::is(eval, "list")) { + eval_functions <- Filter( + f = is.function + , x = eval + ) } # Init predictor to empty @@ -266,6 +270,7 @@ lgb.cv <- function(params = list() callbacks , cb.early.stop( stopping_rounds = early_stopping_rounds + , first_metric_only = isTRUE(params[["first_metric_only"]]) , verbose = verbose ) ) @@ -357,7 +362,11 @@ lgb.cv <- function(params = list() # Update one boosting iteration msg <- lapply(cv_booster$boosters, function(fd) { fd$booster$update(fobj = fobj) - fd$booster$eval_valid(feval = feval) + out <- list() + for (eval_function in eval_functions) { + out <- append(out, fd$booster$eval_valid(feval = eval_function)) + } + return(out) }) # Prepare collection of evaluation results @@ -384,7 +393,13 @@ lgb.cv <- function(params = list() # When early stopping is not activated, we compute the best iteration / score ourselves # based on the first first metric if (record && is.na(env$best_score)) { - first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L] + # when using a custom eval function, the metric name is returned from the + # function, so figure it out from record_evals + if (!is.null(eval_functions[1L])) { + first_metric <- names(cv_booster$record_evals[["valid"]])[1L] + } else { + first_metric <- cv_booster$.__enclos_env__$private$eval_names[1L] + } .find_best <- which.min if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) { .find_best <- which.max @@ -576,7 +591,8 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { msg[[i]][[j]]$value })) }) - # Get evaluation + # Get evaluation. Just taking the first element here to + # get structture (name, higher_bettter, data_name) ret_eval <- msg[[1L]] # Go through evaluation length items diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index a5f3b8e0e460..e1637ed3c007 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -3,10 +3,6 @@ #' @description Logic to train with LightGBM #' @inheritParams lgb_shared_params #' @param valids a list of \code{lgb.Dataset} objects, used for validation -#' @param obj objective function, can be character or custom objective function. Examples include -#' \code{regression}, \code{regression_l1}, \code{huber}, -#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function, can be (a list of) character or custom eval function #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param categorical_feature list of str or int @@ -26,6 +22,7 @@ #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } +#' @inheritSection lgb_shared_params Early Stopping #' @return a trained booster model \code{lgb.Booster}. #' #' @examples @@ -90,7 +87,7 @@ lgb.train <- function(params = list(), params <- lgb.check.obj(params, obj) params <- lgb.check.eval(params, eval) fobj <- NULL - feval <- NULL + eval_functions <- list(NULL) # Check for objective (function or not) if (is.function(params$objective)) { @@ -98,9 +95,17 @@ lgb.train <- function(params = list(), params$objective <- "NONE" } - # Check for loss (function or not) + # If loss is a single function, store it as a 1-element list + # (for backwards compatibility). If it is a list of functions, store + # all of them if (is.function(eval)) { - feval <- eval + eval_functions <- list(eval) + } + if (methods::is(eval, "list")) { + eval_functions <- Filter( + f = is.function + , x = eval + ) } # Init predictor to empty @@ -235,6 +240,7 @@ lgb.train <- function(params = list(), callbacks , cb.early.stop( stopping_rounds = early_stopping_rounds + , first_metric_only = isTRUE(params[["first_metric_only"]]) , verbose = verbose ) ) @@ -280,13 +286,28 @@ lgb.train <- function(params = list(), # Collection: Has validation dataset? if (length(valids) > 0L) { - # Validation has training dataset? - if (valid_contain_train) { - eval_list <- append(eval_list, booster$eval_train(feval = feval)) + # Get evaluation results with passed-in functions + for (eval_function in eval_functions) { + + # Validation has training dataset? + if (valid_contain_train) { + eval_list <- append(eval_list, booster$eval_train(feval = eval_function)) + } + + eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) + } + + # Calling booster$eval_valid() will get + # evaluation results with the metrics in params$metric by calling LGBM_BoosterGetEval_R", + # so need to be sure that gets called, which it wouldn't be above if no functions + # were passed in + if (length(eval_functions) == 0L) { + if (valid_contain_train) { + eval_list <- append(eval_list, booster$eval_train(feval = eval_function)) + } + eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) } - # Has no validation dataset - eval_list <- append(eval_list, booster$eval_valid(feval = feval)) } # Write evaluation result in environment @@ -312,7 +333,7 @@ lgb.train <- function(params = list(), # when using a custom eval function, the metric name is returned from the # function, so figure it out from record_evals - if (!is.null(feval)) { + if (!is.null(eval_functions[1L])) { first_metric <- names(booster$record_evals[[first_valid_name]])[1L] } else { first_metric <- booster$.__enclos_env__$private$eval_names[1L] diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index 84e2503db2f6..2ea789278af6 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -10,11 +10,61 @@ #' and one metric. If there's more than one, will check all of them #' except the training data. Returns the model with (best_iter + early_stopping_rounds). #' If early stopping occurs, the model will have 'best_iter' field. +#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of +#' strings and functions. +#' +#' \itemize{ +#' \item{\bold{a. character vector}: +#' If you provide a character vector to this argument, it should contain strings with valid +#' evaluation metrics. +#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ +#' The "metric" section of the documentation} +#' for a list of valid metrics. +#' } +#' \item{\bold{b. function}: +#' You can provide a custom evaluation function. This +#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named +#' list with three elements: +#' \itemize{ +#' \item{\code{name}: A string with the name of the metric, used for printing +#' and storing results. +#' } +#' \item{\code{value}: A single number indicating the value of the metric for the +#' given predictions and true values +#' } +#' \item{ +#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. +#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. +#' } +#' } +#' } +#' \item{\bold{c. list}: +#' If a list is given, it should only contain character vectors and functions. +#' These should follow the requirements from the descriptions above. +#' } +#' } #' @param eval_freq evaluation output frequency, only effect when verbose > 0 #' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model #' @param nrounds number of training rounds +#' @param obj objective function, can be character or custom objective function. Examples include +#' \code{regression}, \code{regression_l1}, \code{huber}, +#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} #' @param params List of parameters #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training +#' @section Early Stopping: +#' +#' "early stopping" refers to stopping the training process if the model's performance on a given +#' validation set does not improve for several consecutive iterations. +#' +#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable +#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all +#' metrics will be considered for early stopping. +#' +#' If you want to only consider the first metric for early stopping, pass +#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} +#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, +#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) +#' or \code{objective} (passed into \code{params}). #' @keywords internal NULL @@ -47,6 +97,7 @@ NULL #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } +#' @inheritSection lgb_shared_params Early Stopping #' @export lightgbm <- function(data, label = NULL, diff --git a/R-package/R/utils.R b/R-package/R/utils.R index d961cce8a9ab..b3816a6debd7 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -318,10 +318,10 @@ lgb.check.obj <- function(params, obj) { } # [description] -# make sure that "metric" is populated on params, -# and add any eval values to it -# [return] -# params, where "metric" is a list +# Take any character values from eval and store them in params$metric. +# This has to account for the fact that `eval` could be a character vector, +# a function, a list of functions, or a list with a mix of strings and +# functions lgb.check.eval <- function(params, eval) { if (is.null(params$metric)) { @@ -330,13 +330,30 @@ lgb.check.eval <- function(params, eval) { params$metric <- as.list(params$metric) } - if (is.character(eval)) { - params$metric <- append(params$metric, eval) + # if 'eval' is a character vector or list, find the character + # elements and add them to 'metric' + if (!is.function(eval)) { + for (i in seq_along(eval)) { + element <- eval[[i]] + if (is.character(element)) { + params$metric <- append(params$metric, element) + } + } } - if (identical(class(eval), "list")) { - params$metric <- append(params$metric, unlist(eval)) + # If more than one character metric was given, then "None" should + # not be included + if (length(params$metric) > 1L) { + params$metric <- Filter( + f = function(metric) { + !(metric %in% .NO_METRIC_STRINGS()) + } + , x = params$metric + ) } + # duplicate metrics should be filtered out + params$metric <- as.list(unique(unlist(params$metric))) + return(params) } diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index d4f62c2b2207..987b04ec1a0a 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -45,9 +45,41 @@ may allow you to pass other types of data like \code{matrix} and then separately \item{obj}{objective function, can be character or custom objective function. Examples include \code{regression}, \code{regression_l1}, \code{huber}, - \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} - -\item{eval}{evaluation function, can be (list of) character or custom eval function} +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -99,6 +131,23 @@ a trained model \code{lgb.CVBooster}. \description{ Cross validation logic used by LightGBM } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + \examples{ \dontrun{ data(agaricus.train, package = "lightgbm") diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 132e6aa82790..199644458bf7 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -38,7 +38,39 @@ may allow you to pass other types of data like \code{matrix} and then separately \code{regression}, \code{regression_l1}, \code{huber}, \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} -\item{eval}{evaluation function, can be (a list of) character or custom eval function} +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -82,6 +114,23 @@ a trained booster model \code{lgb.Booster}. \description{ Logic to train with LightGBM } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + \examples{ \dontrun{ data(agaricus.train, package = "lightgbm") diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index 501997fd2e2d..ed1922bc2225 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -16,12 +16,50 @@ and one metric. If there's more than one, will check all of them except the training data. Returns the model with (best_iter + early_stopping_rounds). If early stopping occurs, the model will have 'best_iter' field.} +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} + \item{eval_freq}{evaluation output frequency, only effect when verbose > 0} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} \item{nrounds}{number of training rounds} +\item{obj}{objective function, can be character or custom objective function. Examples include +\code{regression}, \code{regression_l1}, \code{huber}, +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + \item{params}{List of parameters} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -29,4 +67,21 @@ If early stopping occurs, the model will have 'best_iter' field.} \description{ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + \keyword{internal} diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 256a7dc6e8e9..13806fbc05a7 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -74,3 +74,20 @@ List of callback functions that are applied at each iteration.} \description{ Simple interface for training a LightGBM model. } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 96e9bd20db6a..638981e60cdf 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -6,6 +6,59 @@ train <- agaricus.train test <- agaricus.test TOLERANCE <- 1e-6 +set.seed(708L) + +# [description] Every time this function is called, it adds 0.1 +# to an accumulator then returns the current value. +# This is used to mock the situation where an evaluation +# metric increases every iteration +ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" +assign(x = "INCREASING_METRIC_ACUMULATOR", value = 0.0, envir = .GlobalEnv) + +.increasing_metric <- function(preds, dtrain) { + if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) { + assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) + } + assign( + x = ACCUMULATOR_NAME + , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + 0.1 + , envir = .GlobalEnv + ) + return(list( + name = "increasing_metric" + , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + , higher_better = TRUE + )) +} + +# [description] Evaluation function that always returns the +# same value +CONSTANT_METRIC_VALUE <- 0.2 +.constant_metric <- function(preds, dtrain) { + return(list( + name = "constant_metric" + , value = CONSTANT_METRIC_VALUE + , higher_better = FALSE + )) +} + +# sample datasets to test early stopping +DTRAIN_RANDOM_REGRESSION <- lgb.Dataset( + data = as.matrix(rnorm(100L), ncol = 1L, drop = FALSE) + , label = rnorm(100L) +) +DVALID_RANDOM_REGRESSION <- lgb.Dataset( + data = as.matrix(rnorm(50L), ncol = 1L, drop = FALSE) + , label = rnorm(50L) +) +DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset( + data = as.matrix(rnorm(120L), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 120L, replace = TRUE) +) +DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset( + data = as.matrix(rnorm(37L), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 37L, replace = TRUE) +) test_that("train and predict binary classification", { nrounds <- 10L @@ -707,7 +760,6 @@ test_that("lgb.train() works with early stopping for regression", { params = list( objective = "regression" , metric = "rmse" - , min_data_in_bin = 5L ) , data = dtrain , nrounds = nrounds @@ -730,7 +782,6 @@ test_that("lgb.train() works with early stopping for regression", { params = list( objective = "regression" , metric = "rmse" - , min_data_in_bin = 5L , early_stopping_rounds = early_stopping_rounds ) , data = dtrain @@ -750,6 +801,314 @@ test_that("lgb.train() works with early stopping for regression", { ) }) +test_that("lgb.train() does not stop early if early_stopping_rounds is not given", { + set.seed(708L) + + increasing_metric_starting_value <- get( + ACCUMULATOR_NAME + , envir = .GlobalEnv + ) + nrounds <- 10L + metrics <- list( + .constant_metric + , .increasing_metric + ) + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "None" + ) + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list("valid1" = DVALID_RANDOM_REGRESSION) + , eval = metrics + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) + + # all 10 iterations should have happen, and the best_iter should be + # the first one (based on constant_metric) + best_iter <- 1L + expect_equal(bst$best_iter, best_iter) + + # best_score should be taken from the first metric + expect_equal( + bst$best_score + , bst$record_evals[["valid1"]][["constant_metric"]][["eval"]][[best_iter]] + ) + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , nrounds + ) + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) + +test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to stop early based on all metrics", { + set.seed(708L) + + early_stopping_rounds <- 3L + param_variations <- list( + list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + ) + , list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = FALSE + ) + ) + + for (params in param_variations) { + + nrounds <- 10L + bst <- lgb.train( + params = params + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) + + # early stopping should have happened, and should have stopped early_stopping_rounds + 1 rounds in + # because constant_metric never improves + # + # the best iteration should be the last one, because increasing_metric was first + # and gets better every iteration + best_iter <- early_stopping_rounds + 1L + expect_equal(bst$best_iter, best_iter) + + # best_score should be taken from "increasing_metric" because it was first + expect_equal( + bst$best_score + , bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]][[best_iter]] + ) + + # early stopping should not have happened. even though increasing_metric kept + # getting better, early stopping should have happened because "constant_metric" + # did not improve + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) + } + +}) + +test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based on only the first metric", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE + ) + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) + + # all 10 iterations should happen, and the best_iter should be the final one + expect_equal(bst$best_iter, nrounds) + + # best_score should be taken from "increasing_metric" + expect_equal( + bst$best_score + , increasing_metric_starting_value + 0.1 * nrounds + ) + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , nrounds + ) + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) + +test_that("lgb.train() works when a mixture of functions and strings are passed to eval", { + set.seed(708L) + nrounds <- 10L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "None" + ) + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , "rmse" + , .constant_metric + , "l2" + ) + ) + + # all 4 metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("rmse", "l2", "increasing_metric", "constant_metric") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 1.105012) < TOLERANCE) + expect_true(abs(results[["l2"]][["eval"]][[1L]] - 1.221051) < TOLERANCE) + expected_increasing_metric <- increasing_metric_starting_value + 0.1 + expect_true( + abs( + results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric + ) < TOLERANCE + ) + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + +}) + +test_that("lgb.train() works when a list of strings or a character vector is passed to eval", { + + # testing list and character vector, as well as length-1 and length-2 + eval_variations <- list( + c("binary_error", "binary_logloss") + , "binary_logloss" + , list("binary_error", "binary_logloss") + , list("binary_logloss") + ) + + for (eval_variation in eval_variations) { + + set.seed(708L) + nrounds <- 10L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = eval_variation + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = unlist(eval_variation) + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + if ("binary_error" %in% unlist(eval_variation)) { + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) + } + if ("binary_logloss" %in% unlist(eval_variation)) { + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) + } + } +}) + +test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { + set.seed(708L) + nrounds <- 10L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "binary_error" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = "binary_logloss" + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("binary_error", "binary_logloss") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) +}) + +test_that("lgb.train() works when you give a function for eval", { + set.seed(708L) + nrounds <- 10L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = .constant_metric + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) +}) + test_that("lgb.train() works with early stopping for regression with a metric that should be minimized", { set.seed(708L) trainDF <- data.frame( @@ -1099,6 +1458,172 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com expect_identical(bst$best_score, auc_scores[which.max(auc_scores)]) }) +test_that("lgb.cv() works when you specify both 'metric' and 'eval' with strings", { + set.seed(708L) + nrounds <- 10L + nfolds <- 4L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "binary" + , metric = "binary_error" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , nfold = nfolds + , eval = "binary_logloss" + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid"]] + , expected = c("binary_error", "binary_logloss") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid"]] + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5005654) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.7011232) < TOLERANCE) + + # all boosters should have been created + expect_length(bst$boosters, nfolds) +}) + +test_that("lgb.cv() works when you give a function for eval", { + set.seed(708L) + nrounds <- 10L + nfolds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nfold = nfolds + , nrounds = nrounds + , eval = .constant_metric + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid"]] + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + expect_named(results, "constant_metric") +}) + +test_that("If first_metric_only is TRUE, lgb.cv() decides to stop early based on only the first metric", { + set.seed(708L) + nrounds <- 10L + nfolds <- 5L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE + ) + , data = DTRAIN_RANDOM_REGRESSION + , nfold = nfolds + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_named(bst$record_evals[["valid"]], c("increasing_metric", "constant_metric")) + + # all 10 iterations should happen, and the best_iter should be the final one + expect_equal(bst$best_iter, nrounds) + + # best_score should be taken from "increasing_metric" + # + # this expected value looks magical and confusing, but it's because + # evaluation metrics are averaged over all folds. + # + # consider 5-fold CV with a metric that adds 0.1 to a global accumulator + # each time it's called + # + # * iter 1: [0.1, 0.2, 0.3, 0.4, 0.5] (mean = 0.3) + # * iter 2: [0.6, 0.7, 0.8, 0.9, 1.0] (mean = 1.3) + # * iter 3: [1.1, 1.2, 1.3, 1.4, 1.5] (mean = 1.8) + # + cv_value <- increasing_metric_starting_value + mean(seq_len(nfolds) / 10.0) + (nrounds - 1L) * 0.1 * nfolds + expect_equal(bst$best_score, cv_value) + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]]) + , nrounds + ) + expect_equal( + length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) + +test_that("early stopping works with lgb.cv()", { + set.seed(708L) + nrounds <- 10L + nfolds <- 5L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE + ) + , data = DTRAIN_RANDOM_REGRESSION + , nfold = nfolds + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .constant_metric + , .increasing_metric + ) + ) + + # only the two functions provided to "eval" should have been evaluated + expect_named(bst$record_evals[["valid"]], c("constant_metric", "increasing_metric")) + + # best_iter should be based on the first metric. Since constant_metric + # never changes, its first iteration was the best oone + expect_equal(bst$best_iter, 1L) + + # best_score should be taken from the first metri + expect_equal(bst$best_score, 0.2) + + # early stopping should have happened, since constant_metric was the first + # one passed to eval and it will not improve over consecutive iterations + # + # note that this test is identical to the previous one, but with the + # order of the eval metrics switched + expect_equal( + length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) + expect_equal( + length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) +}) + +context("interaction constraints") + test_that("lgb.train() throws an informative error if interaction_constraints is not a list", { dtrain <- lgb.Dataset(train$data, label = train$label) params <- list(objective = "regression", interaction_constraints = "[1,2],[3]") diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R index af440118f936..ab2c21950d64 100644 --- a/R-package/tests/testthat/test_utils.R +++ b/R-package/tests/testthat/test_utils.R @@ -115,3 +115,12 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided", expect_named(params, "metric") expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss")) }) + +test_that("lgb.check.eval drops duplicate metrics and preserves order", { + params <- lgb.check.eval( + params = list(metric = "l1") + , eval = list("l2", "rmse", "l1", "rmse") + ) + expect_named(params, "metric") + expect_identical(params[["metric"]], list("l1", "l2", "rmse")) +})