diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 94aac5beede6..961715a2e02e 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -61,7 +61,7 @@ #' will be the same as parameter `begin_iteration`, then next one will add +1, and so on). #' #' - iter_feval Evaluation metrics for `evals` that were supplied, either -#' determined by the objective, or by parameter `feval`. +#' determined by the objective, or by parameter `custom_metric`. #' #' For [xgb.train()], this will be a named vector with one entry per element in #' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for @@ -206,8 +206,7 @@ #' data = dm, #' params = xgb.params(objective = "reg:squarederror", nthread = 1), #' nrounds = 5, -#' callbacks = list(ssq_callback), -#' keep_extra_attributes = TRUE +#' callbacks = list(ssq_callback) #' ) #' #' # Result from 'f_after_iter' will be available as an attribute @@ -451,7 +450,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) { #' Callback for logging the evaluation history #' #' @details This callback creates a table with per-iteration evaluation metrics (see parameters -#' `evals` and `feval` in [xgb.train()]). +#' `evals` and `custom_metric` in [xgb.train()]). #' #' Note: in the column names of the final data.table, the dash '-' character is replaced with #' the underscore '_' in order to make the column names more like regular R identifiers. @@ -957,7 +956,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' label = 1 * (iris$Species == "versicolor"), #' nthread = nthread #' ) -#' param <- list( +#' param <- xgb.params( #' booster = "gblinear", #' objective = "reg:logistic", #' eval_metric = "auc", @@ -971,11 +970,10 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' # rate does not break the convergence, but allows us to illustrate the typical pattern of #' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations. #' bst <- xgb.train( -#' param, +#' c(param, list(eta = 1.)), #' dtrain, -#' list(tr = dtrain), +#' evals = list(tr = dtrain), #' nrounds = 200, -#' eta = 1., #' callbacks = list(xgb.cb.gblinear.history()) #' ) #' @@ -986,14 +984,18 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' # With the deterministic coordinate descent updater, it is safer to use higher learning rates. #' # Will try the classical componentwise boosting which selects a single best feature per round: #' bst <- xgb.train( -#' param, +#' c( +#' param, +#' xgb.params( +#' eta = 0.8, +#' updater = "coord_descent", +#' feature_selector = "thrifty", +#' top_k = 1 +#' ) +#' ), #' dtrain, -#' list(tr = dtrain), +#' evals = list(tr = dtrain), #' nrounds = 200, -#' eta = 0.8, -#' updater = "coord_descent", -#' feature_selector = "thrifty", -#' top_k = 1, #' callbacks = list(xgb.cb.gblinear.history()) #' ) #' matplot(xgb.gblinear.history(bst), type = "l") @@ -1003,11 +1005,10 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' #' # For xgb.cv: #' bst <- xgb.cv( -#' param, +#' c(param, list(eta = 0.8)), #' dtrain, #' nfold = 5, #' nrounds = 100, -#' eta = 0.8, #' callbacks = list(xgb.cb.gblinear.history()) #' ) #' # coefficients in the CV fold #3 @@ -1017,7 +1018,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' #### Multiclass classification: #' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread) #' -#' param <- list( +#' param <- xgb.params( #' booster = "gblinear", #' objective = "multi:softprob", #' num_class = 3, @@ -1029,11 +1030,10 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' # For the default linear updater 'shotgun' it sometimes is helpful #' # to use smaller eta to reduce instability #' bst <- xgb.train( -#' param, +#' c(param, list(eta = 0.5)), #' dtrain, -#' list(tr = dtrain), +#' evals = list(tr = dtrain), #' nrounds = 50, -#' eta = 0.5, #' callbacks = list(xgb.cb.gblinear.history()) #' ) #' @@ -1044,11 +1044,10 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { #' #' # CV: #' bst <- xgb.cv( -#' param, +#' c(param, list(eta = 0.5)), #' dtrain, #' nfold = 5, #' nrounds = 70, -#' eta = 0.5, #' callbacks = list(xgb.cb.gblinear.history(FALSE)) #' ) #' # 1st fold of 1st class diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 79ede367519c..81e15f7e4421 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -71,21 +71,13 @@ NVL <- function(x, val) { # Merges booster params with whatever is provided in ... # plus runs some checks -check.booster.params <- function(params, ...) { +check.booster.params <- function(params) { if (!identical(class(params), "list")) stop("params must be a list") # in R interface, allow for '.' instead of '_' in parameter names names(params) <- gsub(".", "_", names(params), fixed = TRUE) - # merge parameters from the params and the dots-expansion - dot_params <- list(...) - names(dot_params) <- gsub(".", "_", names(dot_params), fixed = TRUE) - if (length(intersect(names(params), - names(dot_params))) > 0) - stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.") - params <- c(params, dot_params) - # providing a parameter multiple times makes sense only for 'eval_metric' name_freqs <- table(names(params)) multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric') @@ -110,7 +102,6 @@ check.booster.params <- function(params, ...) { } # monotone_constraints parser - if (!is.null(params[['monotone_constraints']]) && typeof(params[['monotone_constraints']]) != "character") { vec2str <- paste(params[['monotone_constraints']], collapse = ',') @@ -144,55 +135,56 @@ check.booster.params <- function(params, ...) { # Performs some checks related to custom objective function. -# WARNING: has side-effects and can modify 'params' and 'obj' in its calling frame -check.custom.obj <- function(env = parent.frame()) { - if (!is.null(env$params[['objective']]) && !is.null(env$obj)) - stop("Setting objectives in 'params' and 'obj' at the same time is not allowed") +check.custom.obj <- function(params, objective) { + if (!is.null(params[['objective']]) && !is.null(objective)) + stop("Setting objectives in 'params' and 'objective' at the same time is not allowed") - if (!is.null(env$obj) && typeof(env$obj) != 'closure') - stop("'obj' must be a function") + if (!is.null(objective) && typeof(objective) != 'closure') + stop("'objective' must be a function") # handle the case when custom objective function was provided through params - if (!is.null(env$params[['objective']]) && - typeof(env$params$objective) == 'closure') { - env$obj <- env$params$objective - env$params$objective <- NULL + if (!is.null(params[['objective']]) && + typeof(params$objective) == 'closure') { + objective <- params$objective + params$objective <- NULL } + return(list(params = params, objective = objective)) } # Performs some checks related to custom evaluation function. -# WARNING: has side-effects and can modify 'params' and 'feval' in its calling frame -check.custom.eval <- function(env = parent.frame()) { - if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval)) - stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed") +check.custom.eval <- function(params, custom_metric, maximize, early_stopping_rounds, callbacks) { + if (!is.null(params[['eval_metric']]) && !is.null(custom_metric)) + stop("Setting evaluation metrics in 'params' and 'custom_metric' at the same time is not allowed") - if (!is.null(env$feval) && typeof(env$feval) != 'closure') - stop("'feval' must be a function") + if (!is.null(custom_metric) && typeof(custom_metric) != 'closure') + stop("'custom_metric' must be a function") # handle a situation when custom eval function was provided through params - if (!is.null(env$params[['eval_metric']]) && - typeof(env$params$eval_metric) == 'closure') { - env$feval <- env$params$eval_metric - env$params$eval_metric <- NULL + if (!is.null(params[['eval_metric']]) && + typeof(params$eval_metric) == 'closure') { + custom_metric <- params$eval_metric + params$eval_metric <- NULL } - # require maximize to be set when custom feval and early stopping are used together - if (!is.null(env$feval) && - is.null(env$maximize) && ( - !is.null(env$early_stopping_rounds) || - has.callbacks(env$callbacks, "early_stop"))) + # require maximize to be set when custom metric and early stopping are used together + if (!is.null(custom_metric) && + is.null(maximize) && ( + !is.null(early_stopping_rounds) || + has.callbacks(callbacks, "early_stop"))) stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not") + + return(list(params = params, custom_metric = custom_metric)) } # Update a booster handle for an iteration with dtrain data -xgb.iter.update <- function(bst, dtrain, iter, obj) { +xgb.iter.update <- function(bst, dtrain, iter, objective) { if (!inherits(dtrain, "xgb.DMatrix")) { stop("dtrain must be of xgb.DMatrix class") } handle <- xgb.get.handle(bst) - if (is.null(obj)) { + if (is.null(objective)) { .Call(XGBoosterUpdateOneIter_R, handle, as.integer(iter), dtrain) } else { pred <- predict( @@ -201,12 +193,12 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) { outputmargin = TRUE, training = TRUE ) - gpair <- obj(pred, dtrain) - n_samples <- dim(dtrain)[1] + gpair <- objective(pred, dtrain) + n_samples <- dim(dtrain)[1L] grad <- gpair$grad hess <- gpair$hess - if ((is.matrix(grad) && dim(grad)[1] != n_samples) || + if ((is.matrix(grad) && dim(grad)[1L] != n_samples) || (is.vector(grad) && length(grad) != n_samples) || (is.vector(grad) != is.vector(hess))) { warning(paste( @@ -230,14 +222,14 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) { # Evaluate one iteration. # Returns a named vector of evaluation metrics # with the names in a 'datasetname-metricname' format. -xgb.iter.eval <- function(bst, evals, iter, feval) { +xgb.iter.eval <- function(bst, evals, iter, custom_metric) { handle <- xgb.get.handle(bst) if (length(evals) == 0) return(NULL) evnames <- names(evals) - if (is.null(feval)) { + if (is.null(custom_metric)) { msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames)) mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2) res <- structure(as.numeric(mat[2, ]), names = mat[1, ]) @@ -246,7 +238,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) { w <- evals[[j]] ## predict using all trees preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all") - eval_res <- feval(preds, w) + eval_res <- custom_metric(preds, w) out <- eval_res$value names(out) <- paste0(evnames[j], "-", eval_res$metric) out @@ -498,11 +490,13 @@ NULL #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' # Save as a stand-alone file; load it with xgb.load() @@ -535,44 +529,88 @@ NULL NULL # Lookup table for the deprecated parameters bookkeeping -depr_par_lut <- matrix(c( - 'print.every.n', 'print_every_n', - 'early.stop.round', 'early_stopping_rounds', - 'training.data', 'data', - 'with.stats', 'with_stats', - 'numberOfClusters', 'n_clusters', - 'features.keep', 'features_keep', - 'plot.height', 'plot_height', - 'plot.width', 'plot_width', - 'n_first_tree', 'trees', - 'dummy', 'DUMMY', - 'watchlist', 'evals' -), ncol = 2, byrow = TRUE) -colnames(depr_par_lut) <- c('old', 'new') +deprecated_train_params <- list( + 'print.every.n' = 'print_every_n', + 'early.stop.round' = 'early_stopping_rounds', + 'training.data' = 'data', + 'dtrain' = 'data', + 'watchlist' = 'evals', + 'feval' = 'custom_metric' +) +deprecated_dttree_params <- list( + 'n_first_tree' = 'trees' +) +deprecated_plot_params <- list( + 'plot.height' = 'plot_height', + 'plot.width' = 'plot_width' +) +deprecated_multitrees_params <- c( + deprecated_plot_params, + list('features.keep' = 'features_keep') +) +deprecated_dump_params <- list( + 'with.stats' = 'with_stats' +) +deprecated_plottree_params <- c( + deprecated_plot_params, + deprecated_dump_params +) # Checks the dot-parameters for deprecated names # (including partial matching), gives a deprecation warning, # and sets new parameters to the old parameters' values within its parent frame. # WARNING: has side-effects -check.deprecation <- function(..., env = parent.frame()) { - pars <- list(...) - # exact and partial matches - all_match <- pmatch(names(pars), depr_par_lut[, 1]) - # indices of matched pars' names - idx_pars <- which(!is.na(all_match)) - if (length(idx_pars) == 0) return() - # indices of matched LUT rows - idx_lut <- all_match[idx_pars] - # which of idx_lut were the exact matches? - ex_match <- depr_par_lut[idx_lut, 1] %in% names(pars) - for (i in seq_along(idx_pars)) { - pars_par <- names(pars)[idx_pars[i]] - old_par <- depr_par_lut[idx_lut[i], 1] - new_par <- depr_par_lut[idx_lut[i], 2] - if (!ex_match[i]) { - warning("'", pars_par, "' was partially matched to '", old_par, "'") +check.deprecation <- function( + deprecated_list, + fn_call, + ..., + env = parent.frame(), + allow_unrecognized = FALSE +) { + params <- list(...) + if (length(params) == 0) { + return(NULL) + } + if (is.null(names(params)) || min(nchar(names(params))) == 0L) { + stop("Passed invalid positional arguments") + } + all_match <- pmatch(names(params), names(deprecated_list)) + # throw error on unrecognized parameters + if (!allow_unrecognized && anyNA(all_match)) { + names_unrecognized <- names(params)[is.na(all_match)] + # make it informative if they match something that goes under 'params' + if (deprecated_list[[1L]] == deprecated_train_params[[1L]]) { + names_params <- formalArgs(xgb.params) + names_params <- c(names_params, gsub("_", ".", names_params, fixed = TRUE)) + names_under_params <- intersect(names_unrecognized, names_params) + if (length(names_under_params)) { + stop( + "Passed invalid function arguments: ", + paste(head(names_under_params), collapse = ", "), + ". These should be passed as a list to argument 'params'." + ) + } + } + # otherwise throw a generic error + stop( + "Passed unrecognized parameters: ", + paste(head(names_unrecognized), collapse = ", ") + ) + } + + matched_params <- deprecated_list[all_match[!is.na(all_match)]] + idx_orig <- seq_along(params)[!is.na(all_match)] + function_args_passed <- names(as.list(fn_call))[-1L] + for (idx in seq_along(matched_params)) { + match_old <- names(matched_params)[[idx]] + match_new <- matched_params[[idx]] + warning( + "Parameter '", match_old, "' has been renamed to '", + match_new, "' and will be removed in a future version." + ) + if (match_new %in% function_args_passed) { + stop("Passed both '", match_new, "' and '", match_old, "'.") } - .Deprecated(new_par, old = old_par, package = 'xgboost') - stop() + env[[match_new]] <- params[[idx_orig[idx]]] } } diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 8cf41067a73d..6ffaa299b500 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -264,11 +264,13 @@ xgb.get.handle <- function(object) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 0.5, -#' nthread = nthread, #' nrounds = 5, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 0.5, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' # use all trees by default @@ -307,13 +309,15 @@ xgb.get.handle <- function(object) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), -#' max_depth = 4, -#' eta = 0.5, -#' nthread = 2, #' nrounds = 10, -#' subsample = 0.5, -#' objective = "multi:softprob", -#' num_class = num_class +#' params = xgb.params( +#' max_depth = 4, +#' eta = 0.5, +#' nthread = 2, +#' subsample = 0.5, +#' objective = "multi:softprob", +#' num_class = num_class +#' ) #' ) #' #' # predict for softmax returns num_class probability numbers per case: @@ -329,13 +333,15 @@ xgb.get.handle <- function(object) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), -#' max_depth = 4, -#' eta = 0.5, -#' nthread = 2, #' nrounds = 10, -#' subsample = 0.5, -#' objective = "multi:softmax", -#' num_class = num_class +#' params = xgb.params( +#' max_depth = 4, +#' eta = 0.5, +#' nthread = 2, +#' subsample = 0.5, +#' objective = "multi:softmax", +#' num_class = num_class +#' ) #' ) #' #' pred <- predict(bst, as.matrix(iris[, -5])) @@ -662,11 +668,13 @@ validate.features <- function(bst, newdata) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' xgb.attr(bst, "my_attribute") <- "my attribute value" @@ -768,11 +776,13 @@ xgb.attributes <- function(object) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' config <- xgb.config(bst) @@ -821,11 +831,13 @@ xgb.config <- function(object) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' xgb.model.parameters(bst) <- list(eta = 0.1) @@ -1080,7 +1092,7 @@ xgb.best_iteration <- function(bst) { #' x <- as.matrix(mtcars[, -1]) #' #' dm <- xgb.DMatrix(data = x, label = y, nthread = 1) -#' params <- list(booster = "gblinear", nthread = 1) +#' params <- xgb.params(booster = "gblinear", nthread = 1) #' model <- xgb.train(data = dm, params = params, nrounds = 2) #' coef(model) #' @export @@ -1164,7 +1176,7 @@ coef.xgb.Booster <- function(object, ...) { #' model <- xgb.train( #' data = dm, #' params = xgb.params(nthread = 1), -#' nround = 3 +#' nrounds = 3 #' ) #' #' # Set an arbitrary attribute kept at the C level @@ -1227,7 +1239,7 @@ xgb.copy.Booster <- function(model) { #' model <- xgb.train( #' params = xgb.params(nthread = 1), #' data = xgb.DMatrix(x, label = y, nthread = 1), -#' nround = 3 +#' nrounds = 3 #' ) #' #' model_shallow_copy <- model @@ -1266,11 +1278,13 @@ xgb.is.same.Booster <- function(obj1, obj2) { #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' attr(bst, "myattr") <- "memo" diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index b20796d750ab..ce06f5c50370 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -688,7 +688,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { #' dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1) #' #' # After construction, can be used as a regular DMatrix -#' params <- list(nthread = 1, objective = "reg:squarederror") +#' params <- xgb.params(nthread = 1, objective = "reg:squarederror") #' model <- xgb.train(data = dm, nrounds = 2, params = params) #' #' # Predictions can also be called on it, and should be the same diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R index 2c4015c5f2de..85d9d560cef0 100644 --- a/R-package/R/xgb.create.features.R +++ b/R-package/R/xgb.create.features.R @@ -42,7 +42,6 @@ #' #' @param model Decision tree boosting model learned on the original data. #' @param data Original data (usually provided as a `dgCMatrix` matrix). -#' @param ... Currently not used. #' #' @return A `dgCMatrix` matrix including both the original data and the new features. #' @@ -53,10 +52,10 @@ #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) #' -#' param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic') +#' param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic', nthread = 1) #' nrounds = 4 #' -#' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2) +#' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds) #' #' # Model accuracy without new features #' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / @@ -68,12 +67,12 @@ #' #' # learning with new features #' new.dtrain <- xgb.DMatrix( -#' data = new.features.train, label = agaricus.train$label, nthread = 2 +#' data = new.features.train, label = agaricus.train$label #' ) #' new.dtest <- xgb.DMatrix( -#' data = new.features.test, label = agaricus.test$label, nthread = 2 +#' data = new.features.test, label = agaricus.test$label #' ) -#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) +#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds) #' #' # Model accuracy with new features #' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / @@ -84,8 +83,7 @@ #' accuracy.after, "!\n")) #' #' @export -xgb.create.features <- function(model, data, ...) { - check.deprecation(...) +xgb.create.features <- function(model, data) { pred_with_leaf <- predict.xgb.Booster(model, data, predleaf = TRUE) cols <- lapply(as.data.frame(pred_with_leaf), factor) cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index f3be5191b4db..9b4095150117 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -8,7 +8,6 @@ #' #' Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix` #' or `xgb.ExtMemDMatrix` are not supported here. -#' @param nrounds The max number of iterations. #' @param nfold The original dataset is randomly partitioned into `nfold` equal size subsamples. #' @param prediction A logical value indicating whether to return the test fold predictions #' from each CV model. This parameter engages the [xgb.cb.cv.predict()] callback. @@ -24,10 +23,6 @@ #' - `auc`: Area under curve #' - `aucpr`: Area under PR curve #' - `merror`: Exact matching error used to evaluate multi-class classification -#' @param obj Customized objective function. Returns gradient and second order -#' gradient with given prediction and dtrain. -#' @param feval Customized evaluation function. Returns -#' `list(metric='metric-name', value='metric-value')` with given prediction and dtrain. #' @param stratified Logical flag indicating whether sampling of folds should be stratified #' by the values of outcome labels. For real-valued labels in regression objectives, #' stratification will be done by discretizing the labels into up to 5 buckets beforehand. @@ -51,18 +46,6 @@ #' (the default) all indices not specified in `folds` will be used for training. #' #' This is not supported when `data` has `group` field. -#' @param verbose Logical flag. Should statistics be printed during the process? -#' @param print_every_n Print each nth iteration evaluation messages when `verbose > 0`. -#' Default is 1 which means all messages are printed. This parameter is passed to the -#' [xgb.cb.print.evaluation()] callback. -#' @param early_stopping_rounds If `NULL`, the early stopping function is not triggered. -#' If set to an integer `k`, training with a validation set will stop if the performance -#' doesn't improve for `k` rounds. -#' Setting this parameter engages the [xgb.cb.early.stop()] callback. -#' @param maximize If `feval` and `early_stopping_rounds` are set, -#' then this parameter must be set as well. -#' When it is `TRUE`, it means the larger the evaluation score the better. -#' This parameter is passed to the [xgb.cb.early.stop()] callback. #' @param callbacks A list of callback functions to perform various task during boosting. #' See [xgb.Callback()]. Some of the callbacks are automatically created depending on the #' parameters' values. User can provide either existing or their own callback methods in order @@ -110,12 +93,14 @@ #' cv <- xgb.cv( #' data = dtrain, #' nrounds = 3, -#' nthread = 2, +#' params = xgb.params( +#' nthread = 2, +#' max_depth = 3, +#' eta = 1, +#' objective = "binary:logistic" +#' ), #' nfold = 5, -#' metrics = list("rmse","auc"), -#' max_depth = 3, -#' eta = 1, -#' objective = "binary:logistic" +#' metrics = list("rmse","auc") #' ) #' print(cv) #' print(cv, verbose = TRUE) @@ -123,23 +108,27 @@ #' @export xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, prediction = FALSE, showsd = TRUE, metrics = list(), - obj = NULL, feval = NULL, stratified = "auto", folds = NULL, train_folds = NULL, - verbose = TRUE, print_every_n = 1L, + objective = NULL, custom_metric = NULL, stratified = "auto", + folds = NULL, train_folds = NULL, verbose = TRUE, print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) { + check.deprecation(deprecated_train_params, match.call(), ...) - check.deprecation(...) stopifnot(inherits(data, "xgb.DMatrix")) if (inherits(data, "xgb.DMatrix") && .Call(XGCheckNullPtr_R, data)) { stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.") } - params <- check.booster.params(params, ...) + params <- check.booster.params(params) # TODO: should we deprecate the redundant 'metrics' parameter? for (m in metrics) params <- c(params, list("eval_metric" = m)) - check.custom.obj() - check.custom.eval() + tmp <- check.custom.obj(params, objective) + params <- tmp$params + objective <- tmp$objective + tmp <- check.custom.eval(params, custom_metric, maximize, early_stopping_rounds, callbacks) + params <- tmp$params + custom_metric <- tmp$custom_metric if (stratified == "auto") { if (is.character(params$objective)) { @@ -258,13 +247,13 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, bst = fd$bst, dtrain = fd$dtrain, iter = iteration - 1, - obj = obj + objective = objective ) xgb.iter.eval( bst = fd$bst, evals = fd$evals, iter = iteration - 1, - feval = feval + custom_metric = custom_metric ) }) msg <- simplify2array(msg) @@ -324,11 +313,13 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, #' cv <- xgb.cv( #' data = xgb.DMatrix(train$data, label = train$label), #' nfold = 5, -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' print(cv) #' print(cv, verbose = TRUE) diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index 76271ec515c9..cf601e6388c1 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -16,8 +16,7 @@ #' #' Format 'dot' for a single tree can be passed directly to packages that consume this format #' for graph visualization, such as function `DiagrammeR::grViz()` -#' @param ... Currently not used -#' +#' @inheritParams xgb.train #' @return #' If fname is not provided or set to `NULL` the function will return the model #' as a character vector. Otherwise it will return `TRUE`. @@ -32,11 +31,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' # save the model in file 'xgb.model.dump' @@ -56,7 +57,7 @@ #' @export xgb.dump <- function(model, fname = NULL, fmap = "", with_stats = FALSE, dump_format = c("text", "json", "dot"), ...) { - check.deprecation(...) + check.deprecation(deprecated_dump_params, match.call(), ...) dump_format <- match.arg(dump_format) if (!inherits(model, "xgb.Booster")) stop("model: argument must be of type xgb.Booster") diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 548421d2c83c..c1b45e81bb8c 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -46,11 +46,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' xgb.importance(model = bst) @@ -58,10 +60,13 @@ #' # binomial classification using "gblinear": #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' booster = "gblinear", -#' eta = 0.3, -#' nthread = 1, -#' nrounds = 20,objective = "binary:logistic" +#' nrounds = 20, +#' params = xgb.params( +#' booster = "gblinear", +#' eta = 0.3, +#' nthread = 1, +#' objective = "binary:logistic" +#' ) #' ) #' #' xgb.importance(model = bst) @@ -74,12 +79,14 @@ #' as.matrix(iris[, -5]), #' label = as.numeric(iris$Species) - 1 #' ), -#' max_depth = 3, -#' eta = 0.2, -#' nthread = 2, #' nrounds = nrounds, -#' objective = "multi:softprob", -#' num_class = nclass +#' params = xgb.params( +#' max_depth = 3, +#' eta = 0.2, +#' nthread = 2, +#' objective = "multi:softprob", +#' num_class = nclass +#' ) #' ) #' #' # all classes clumped together: @@ -102,12 +109,14 @@ #' scale(as.matrix(iris[, -5])), #' label = as.numeric(iris$Species) - 1 #' ), -#' booster = "gblinear", -#' eta = 0.2, -#' nthread = 1, #' nrounds = 15, -#' objective = "multi:softprob", -#' num_class = nclass +#' params = xgb.params( +#' booster = "gblinear", +#' eta = 0.2, +#' nthread = 1, +#' objective = "multi:softprob", +#' num_class = nclass +#' ) #' ) #' #' xgb.importance(model = mbst) diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R index 29ab2dadaf72..b08a308a3dd8 100644 --- a/R-package/R/xgb.load.R +++ b/R-package/R/xgb.load.R @@ -32,11 +32,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' fname <- file.path(tempdir(), "xgb.ubj") diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index db2972da7513..12ed705ba8f7 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -13,8 +13,7 @@ #' @param use_int_id A logical flag indicating whether nodes in columns "Yes", "No", and #' "Missing" should be represented as integers (when `TRUE`) or as "Tree-Node" #' character strings (when `FALSE`, default). -#' @param ... Currently not used. -#' +#' @inheritParams xgb.train #' @return #' A `data.table` with detailed information about tree nodes. It has the following columns: #' - `Tree`: integer ID of a tree in a model (zero-based index). @@ -44,11 +43,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' # This bst model already has feature_names stored with it, so those would be used when @@ -66,7 +67,7 @@ #' @export xgb.model.dt.tree <- function(model = NULL, text = NULL, trees = NULL, use_int_id = FALSE, ...) { - check.deprecation(...) + check.deprecation(deprecated_dttree_params, match.call(), ...) if (!inherits(model, "xgb.Booster") && !is.character(text)) { stop("Either 'model' must be an object of class xgb.Booster\n", diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R index c8aa92f22f6b..d6ba9c3d2411 100644 --- a/R-package/R/xgb.plot.deepness.R +++ b/R-package/R/xgb.plot.deepness.R @@ -51,12 +51,14 @@ #' ## Change max_depth to a higher number to get a more significant result #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 6, -#' nthread = nthread, #' nrounds = 50, -#' objective = "binary:logistic", -#' subsample = 0.5, -#' min_child_weight = 2 +#' params = xgb.params( +#' max_depth = 6, +#' nthread = nthread, +#' objective = "binary:logistic", +#' subsample = 0.5, +#' min_child_weight = 2 +#' ) #' ) #' #' xgb.plot.deepness(bst) diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R index 11be29a7cb68..750f386dd6f2 100644 --- a/R-package/R/xgb.plot.importance.R +++ b/R-package/R/xgb.plot.importance.R @@ -52,11 +52,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 3, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 3, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst) @@ -74,7 +76,7 @@ #' @export xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL, rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) { - check.deprecation(...) + check.deprecation(deprecated_plot_params, match.call(), ..., allow_unrecognized = TRUE) if (!is.data.table(importance_matrix)) { stop("importance_matrix: must be a data.table") } diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index 8b4f0eeed037..1c57dd84babd 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -36,13 +36,15 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), -#' max_depth = 15, -#' eta = 1, -#' nthread = nthread, #' nrounds = 30, -#' objective = "binary:logistic", -#' min_child_weight = 50, -#' verbose = 0 +#' verbose = 0, +#' params = xgb.params( +#' max_depth = 15, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic", +#' min_child_weight = 50 +#' ) #' ) #' #' p <- xgb.plot.multi.trees(model = bst, features_keep = 3) @@ -65,7 +67,7 @@ xgb.plot.multi.trees <- function(model, features_keep = 5, plot_width = NULL, pl if (!requireNamespace("DiagrammeR", quietly = TRUE)) { stop("DiagrammeR is required for xgb.plot.multi.trees") } - check.deprecation(...) + check.deprecation(deprecated_multitrees_params, match.call(), ...) tree.matrix <- xgb.model.dt.tree(model = model) # first number of the path represents the tree, then the following numbers are related to the path to follow diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R index 4184c6f5ea6a..bb678968db88 100644 --- a/R-package/R/xgb.plot.shap.R +++ b/R-package/R/xgb.plot.shap.R @@ -84,12 +84,14 @@ #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label), #' nrounds = nrounds, -#' eta = 0.1, -#' max_depth = 3, -#' subsample = 0.5, -#' objective = "binary:logistic", -#' nthread = nthread, -#' verbose = 0 +#' verbose = 0, +#' params = xgb.params( +#' eta = 0.1, +#' max_depth = 3, +#' subsample = 0.5, +#' objective = "binary:logistic", +#' nthread = nthread +#' ) #' ) #' #' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none") @@ -109,13 +111,15 @@ #' mbst <- xgb.train( #' data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1), #' nrounds = nrounds, -#' max_depth = 2, -#' eta = 0.3, -#' subsample = 0.5, -#' nthread = nthread, -#' objective = "multi:softprob", -#' num_class = nclass, -#' verbose = 0 +#' verbose = 0, +#' params = xgb.params( +#' max_depth = 2, +#' eta = 0.3, +#' subsample = 0.5, +#' nthread = nthread, +#' objective = "multi:softprob", +#' num_class = nclass +#' ) #' ) #' trees0 <- seq(from = 0, by = nclass, length.out = nrounds) #' col <- rgb(0, 0, 1, 0.5) diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index f5d53bb3432e..f18e17a9e1fe 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -29,7 +29,7 @@ #' splits. When this option is on, the model dump contains two additional #' values: gain is the approximate loss function gain we get in each split; #' cover is the sum of second order gradient in each node. -#' @param ... Currently not used. +#' @inheritParams xgb.train #' @return #' #' Rendered graph object which is an htmlwidget of ' class `grViz`. Similar to @@ -41,11 +41,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label), -#' max_depth = 3, -#' eta = 1, -#' nthread = 2, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 3, +#' eta = 1, +#' nthread = 2, +#' objective = "binary:logistic" +#' ) #' ) #' #' # plot the first tree @@ -67,7 +69,7 @@ xgb.plot.tree <- function(model, plot_width = NULL, plot_height = NULL, with_stats = FALSE, ...) { - check.deprecation(...) + check.deprecation(deprecated_plottree_params, match.call(), ...) if (!inherits(model, "xgb.Booster")) { stop("model has to be an object of the class xgb.Booster") } diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index 195a58e4881c..855cf964b37b 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -44,11 +44,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' fname <- file.path(tempdir(), "xgb.ubj") diff --git a/R-package/R/xgb.save.raw.R b/R-package/R/xgb.save.raw.R index 197c0980d9ff..3c10cd9f2a5b 100644 --- a/R-package/R/xgb.save.raw.R +++ b/R-package/R/xgb.save.raw.R @@ -23,11 +23,13 @@ #' #' bst <- xgb.train( #' data = xgb.DMatrix(train$data, label = train$label), -#' max_depth = 2, -#' eta = 1, -#' nthread = nthread, #' nrounds = 2, -#' objective = "binary:logistic" +#' params = xgb.params( +#' max_depth = 2, +#' eta = 1, +#' nthread = nthread, +#' objective = "binary:logistic" +#' ) #' ) #' #' raw <- xgb.save.raw(bst) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index aee8f37d8f7b..171ed2fc4170 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -8,8 +8,7 @@ #' and the documentation for [xgb.params()] for details. #' #' Should be passed as list with named entries. Parameters that are not specified in this -#' list will use their default values. Alternatively, parameters may be passed directly -#' as function arguments (accepted through `...`). +#' list will use their default values. #' #' A list of named parameters can be created through the function [xgb.params()], which #' accepts all valid parameters as function arguments. @@ -19,28 +18,30 @@ #' as inputs, such as data frames and matrices. #' @param nrounds Max number of boosting iterations. #' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance. -#' Metrics specified in either `eval_metric` or `feval` will be computed for each -#' of these datasets during each boosting iteration, and stored in the end as a field named -#' `evaluation_log` in the resulting object. When either `verbose>=1` or -#' [xgb.cb.print.evaluation()] callback is engaged, the performance results are continuously -#' printed out during the training. +#' Metrics specified in either `eval_metric` (under params) or `custom_metric` (function +#' argument here) will be computed for each of these datasets during each boosting iteration, +#' and stored in the end as a field named `evaluation_log` in the resulting object. +#' +#' When either `verbose>=1` or [xgb.cb.print.evaluation()] callback is engaged, the performance +#' results are continuously printed out during the training. +#' #' E.g., specifying `evals=list(validation1=mat1, validation2=mat2)` allows to track -#' the performance of each round's model on mat1 and mat2. -#' @param obj Customized objective function. Should take two arguments: the first one will be the +#' the performance of each round's model on `mat1` and `mat2`. +#' @param objective Customized objective function. Should take two arguments: the first one will be the #' current predictions (either a numeric vector or matrix depending on the number of targets / classes), #' and the second one will be the `data` DMatrix object that is used for training. #' #' It should return a list with two elements `grad` and `hess` (in that order), as either #' numeric vectors or numeric matrices depending on the number of targets / classes (same #' dimension as the predictions that are passed as first argument). -#' @param feval Customized evaluation function. Just like `obj`, should take two arguments, with -#' the first one being the predictions and the second one the `data` DMatrix. +#' @param custom_metric Customized evaluation function. Just like `objective`, should take two arguments, +#' with the first one being the predictions and the second one the `data` DMatrix. #' #' Should return a list with two elements `metric` (name that will be displayed for this metric, #' should be a string / character), and `value` (the number that the function calculates, should #' be a numeric scalar). #' -#' Note that even if passing `feval`, objectives also have an associated default metric that +#' Note that even if passing `custom_metric`, objectives also have an associated default metric that #' will be evaluated in addition to it. In order to disable the built-in metric, one can pass #' parameter `disable_default_eval_metric = TRUE`. #' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance. @@ -53,7 +54,7 @@ #' @param early_stopping_rounds If `NULL`, the early stopping function is not triggered. #' If set to an integer `k`, training with a validation set will stop if the performance #' doesn't improve for `k` rounds. Setting this parameter engages the [xgb.cb.early.stop()] callback. -#' @param maximize If `feval` and `early_stopping_rounds` are set, then this parameter must be set as well. +#' @param maximize If `custom_metric` and `early_stopping_rounds` are set, then this parameter must be set as well. #' When it is `TRUE`, it means the larger the evaluation score the better. #' This parameter is passed to the [xgb.cb.early.stop()] callback. #' @param save_period When not `NULL`, model is saved to disk after every `save_period` rounds. @@ -71,7 +72,13 @@ #' such as an evaluation log (a `data.table` object) - be aware that these objects are kept #' as R attributes, and thus do not get saved when using XGBoost's own serializaters like #' [xgb.save()] (but are kept when using R serializers like [saveRDS()]). -#' @param ... Other parameters to pass to `params`. See [xgb.params()] for more details. +#' @param ... Not used. +#' +#' Some arguments are currently deprecated or have been renamed. If a deprecated argument +#' is passed, will throw a warning and use its current equivalent. +#' +#' If some additional argument is passed that is neither a current function argument nor +#' a deprecated argument, an error will be thrown. #' @return An object of class `xgb.Booster`. #' @details #' Compared to [xgboost()], the `xgb.train()` interface supports advanced features such as @@ -132,7 +139,7 @@ #' evals <- list(train = dtrain, eval = dtest) #' #' ## A simple xgb.train example: -#' param <- list( +#' param <- xgb.params( #' max_depth = 2, #' eta = 1, #' nthread = nthread, @@ -156,9 +163,9 @@ #' return(list(metric = "error", value = err)) #' } #' -#' # These functions could be used by passing them either: -#' # as 'objective' and 'eval_metric' parameters in the params list: -#' param <- list( +#' # These functions could be used by passing them as 'objective' and +#' # 'eval_metric' parameters in the params list: +#' param <- xgb.params( #' max_depth = 2, #' eta = 1, #' nthread = nthread, @@ -167,21 +174,11 @@ #' ) #' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0) #' -#' # or through the ... arguments: -#' param <- list(max_depth = 2, eta = 1, nthread = nthread) +#' # ... or as dedicated 'objective' and 'custom_metric' parameters of xgb.train: #' bst <- xgb.train( -#' param, -#' dtrain, -#' nrounds = 2, -#' evals = evals, -#' verbose = 0, -#' objective = logregobj, -#' eval_metric = evalerror -#' ) -#' -#' # or as dedicated 'obj' and 'feval' parameters of xgb.train: -#' bst <- xgb.train( -#' param, dtrain, nrounds = 2, evals = evals, obj = logregobj, feval = evalerror +#' within(param, rm("objective", "eval_metric")), +#' dtrain, nrounds = 2, evals = evals, +#' objective = logregobj, custom_metric = evalerror #' ) #' #' @@ -210,17 +207,19 @@ #' ) #' @export xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(), - obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L, + objective = NULL, custom_metric = NULL, verbose = 1, print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...) { + check.deprecation(deprecated_train_params, match.call(), ...) - check.deprecation(...) - - params <- check.booster.params(params, ...) - - check.custom.obj() - check.custom.eval() + params <- check.booster.params(params) + tmp <- check.custom.obj(params, objective) + params <- tmp$params + objective <- tmp$objective + tmp <- check.custom.eval(params, custom_metric, maximize, early_stopping_rounds, callbacks) + params <- tmp$params + custom_metric <- tmp$custom_metric # data & evals checks dtrain <- data @@ -325,7 +324,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(), bst = bst, dtrain = dtrain, iter = iteration - 1, - obj = obj + objective = objective ) bst_evaluation <- NULL @@ -334,7 +333,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(), bst = bst, evals = evals, iter = iteration - 1, - feval = feval + custom_metric = custom_metric ) } diff --git a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd index 4ce043799436..c4e9026d77f3 100644 --- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd +++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd @@ -82,11 +82,13 @@ data(agaricus.train, package = "xgboost") bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) # Save as a stand-alone file; load it with xgb.load() diff --git a/R-package/man/coef.xgb.Booster.Rd b/R-package/man/coef.xgb.Booster.Rd index 295c766e6413..011139804479 100644 --- a/R-package/man/coef.xgb.Booster.Rd +++ b/R-package/man/coef.xgb.Booster.Rd @@ -48,7 +48,7 @@ y <- mtcars[, 1] x <- as.matrix(mtcars[, -1]) dm <- xgb.DMatrix(data = x, label = y, nthread = 1) -params <- list(booster = "gblinear", nthread = 1) +params <- xgb.params(booster = "gblinear", nthread = 1) model <- xgb.train(data = dm, params = params, nrounds = 2) coef(model) } diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 345a6437dc31..2ef3f1b4bf61 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -223,11 +223,13 @@ test <- agaricus.test bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 0.5, - nthread = nthread, nrounds = 5, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 0.5, + nthread = nthread, + objective = "binary:logistic" + ) ) # use all trees by default @@ -266,13 +268,15 @@ set.seed(11) bst <- xgb.train( data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), - max_depth = 4, - eta = 0.5, - nthread = 2, nrounds = 10, - subsample = 0.5, - objective = "multi:softprob", - num_class = num_class + params = xgb.params( + max_depth = 4, + eta = 0.5, + nthread = 2, + subsample = 0.5, + objective = "multi:softprob", + num_class = num_class + ) ) # predict for softmax returns num_class probability numbers per case: @@ -288,13 +292,15 @@ set.seed(11) bst <- xgb.train( data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), - max_depth = 4, - eta = 0.5, - nthread = 2, nrounds = 10, - subsample = 0.5, - objective = "multi:softmax", - num_class = num_class + params = xgb.params( + max_depth = 4, + eta = 0.5, + nthread = 2, + subsample = 0.5, + objective = "multi:softmax", + num_class = num_class + ) ) pred <- predict(bst, as.matrix(iris[, -5])) diff --git a/R-package/man/print.xgb.Booster.Rd b/R-package/man/print.xgb.Booster.Rd index a1e1e7f7226b..e797bd60f6d4 100644 --- a/R-package/man/print.xgb.Booster.Rd +++ b/R-package/man/print.xgb.Booster.Rd @@ -23,11 +23,13 @@ train <- agaricus.train bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) attr(bst, "myattr") <- "memo" diff --git a/R-package/man/print.xgb.cv.Rd b/R-package/man/print.xgb.cv.Rd index fbc4b9e32151..a8c1d70ef4a7 100644 --- a/R-package/man/print.xgb.cv.Rd +++ b/R-package/man/print.xgb.cv.Rd @@ -27,11 +27,13 @@ train <- agaricus.train cv <- xgb.cv( data = xgb.DMatrix(train$data, label = train$label), nfold = 5, - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) print(cv) print(cv, verbose = TRUE) diff --git a/R-package/man/xgb.Callback.Rd b/R-package/man/xgb.Callback.Rd index 5fd756538889..1d108010798b 100644 --- a/R-package/man/xgb.Callback.Rd +++ b/R-package/man/xgb.Callback.Rd @@ -120,7 +120,7 @@ example by using the early stopping callback \code{\link[=xgb.cb.early.stop]{xgb \item iteration Index of the iteration number that is being executed (first iteration will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on). \item iter_feval Evaluation metrics for \code{evals} that were supplied, either -determined by the objective, or by parameter \code{feval}. +determined by the objective, or by parameter \code{custom_metric}. For \code{\link[=xgb.train]{xgb.train()}}, this will be a named vector with one entry per element in \code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for @@ -222,8 +222,7 @@ model <- xgb.train( data = dm, params = xgb.params(objective = "reg:squarederror", nthread = 1), nrounds = 5, - callbacks = list(ssq_callback), - keep_extra_attributes = TRUE + callbacks = list(ssq_callback) ) # Result from 'f_after_iter' will be available as an attribute diff --git a/R-package/man/xgb.ExtMemDMatrix.Rd b/R-package/man/xgb.ExtMemDMatrix.Rd index a4555f571a76..d5d71ef3c0c2 100644 --- a/R-package/man/xgb.ExtMemDMatrix.Rd +++ b/R-package/man/xgb.ExtMemDMatrix.Rd @@ -108,7 +108,7 @@ cache_prefix <- tempdir() dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1) # After construction, can be used as a regular DMatrix -params <- list(nthread = 1, objective = "reg:squarederror") +params <- xgb.params(nthread = 1, objective = "reg:squarederror") model <- xgb.train(data = dm, nrounds = 2, params = params) # Predictions can also be called on it, and should be the same diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd index 4c7356eb72ec..015da9458b9b 100644 --- a/R-package/man/xgb.attr.Rd +++ b/R-package/man/xgb.attr.Rd @@ -66,11 +66,13 @@ train <- agaricus.train bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) xgb.attr(bst, "my_attribute") <- "my attribute value" diff --git a/R-package/man/xgb.cb.evaluation.log.Rd b/R-package/man/xgb.cb.evaluation.log.Rd index 037dc7cbc2f4..2fe6289ac29b 100644 --- a/R-package/man/xgb.cb.evaluation.log.Rd +++ b/R-package/man/xgb.cb.evaluation.log.Rd @@ -14,7 +14,7 @@ Callback for logging the evaluation history } \details{ This callback creates a table with per-iteration evaluation metrics (see parameters -\code{evals} and \code{feval} in \code{\link[=xgb.train]{xgb.train()}}). +\code{evals} and \code{custom_metric} in \code{\link[=xgb.train]{xgb.train()}}). Note: in the column names of the final data.table, the dash '-' character is replaced with the underscore '_' in order to make the column names more like regular R identifiers. diff --git a/R-package/man/xgb.cb.gblinear.history.Rd b/R-package/man/xgb.cb.gblinear.history.Rd index c2b7709aac62..a5c6cd17a011 100644 --- a/R-package/man/xgb.cb.gblinear.history.Rd +++ b/R-package/man/xgb.cb.gblinear.history.Rd @@ -59,7 +59,7 @@ dtrain <- xgb.DMatrix( label = 1 * (iris$Species == "versicolor"), nthread = nthread ) -param <- list( +param <- xgb.params( booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", @@ -73,11 +73,10 @@ param <- list( # rate does not break the convergence, but allows us to illustrate the typical pattern of # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations. bst <- xgb.train( - param, + c(param, list(eta = 1.)), dtrain, - list(tr = dtrain), + evals = list(tr = dtrain), nrounds = 200, - eta = 1., callbacks = list(xgb.cb.gblinear.history()) ) @@ -88,14 +87,18 @@ matplot(coef_path, type = "l") # With the deterministic coordinate descent updater, it is safer to use higher learning rates. # Will try the classical componentwise boosting which selects a single best feature per round: bst <- xgb.train( - param, + c( + param, + xgb.params( + eta = 0.8, + updater = "coord_descent", + feature_selector = "thrifty", + top_k = 1 + ) + ), dtrain, - list(tr = dtrain), + evals = list(tr = dtrain), nrounds = 200, - eta = 0.8, - updater = "coord_descent", - feature_selector = "thrifty", - top_k = 1, callbacks = list(xgb.cb.gblinear.history()) ) matplot(xgb.gblinear.history(bst), type = "l") @@ -105,11 +108,10 @@ matplot(xgb.gblinear.history(bst), type = "l") # For xgb.cv: bst <- xgb.cv( - param, + c(param, list(eta = 0.8)), dtrain, nfold = 5, nrounds = 100, - eta = 0.8, callbacks = list(xgb.cb.gblinear.history()) ) # coefficients in the CV fold #3 @@ -119,7 +121,7 @@ matplot(xgb.gblinear.history(bst)[[3]], type = "l") #### Multiclass classification: dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread) -param <- list( +param <- xgb.params( booster = "gblinear", objective = "multi:softprob", num_class = 3, @@ -131,11 +133,10 @@ param <- list( # For the default linear updater 'shotgun' it sometimes is helpful # to use smaller eta to reduce instability bst <- xgb.train( - param, + c(param, list(eta = 0.5)), dtrain, - list(tr = dtrain), + evals = list(tr = dtrain), nrounds = 50, - eta = 0.5, callbacks = list(xgb.cb.gblinear.history()) ) @@ -146,11 +147,10 @@ matplot(xgb.gblinear.history(bst, class_index = 2), type = "l") # CV: bst <- xgb.cv( - param, + c(param, list(eta = 0.5)), dtrain, nfold = 5, nrounds = 70, - eta = 0.5, callbacks = list(xgb.cb.gblinear.history(FALSE)) ) # 1st fold of 1st class diff --git a/R-package/man/xgb.config.Rd b/R-package/man/xgb.config.Rd index 5ac223b4d8a8..12c23d2b29dd 100644 --- a/R-package/man/xgb.config.Rd +++ b/R-package/man/xgb.config.Rd @@ -37,11 +37,13 @@ train <- agaricus.train bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) config <- xgb.config(bst) diff --git a/R-package/man/xgb.copy.Booster.Rd b/R-package/man/xgb.copy.Booster.Rd index aaf14f3920e0..20e6d0633c11 100644 --- a/R-package/man/xgb.copy.Booster.Rd +++ b/R-package/man/xgb.copy.Booster.Rd @@ -31,7 +31,7 @@ dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, params = xgb.params(nthread = 1), - nround = 3 + nrounds = 3 ) # Set an arbitrary attribute kept at the C level diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd index 282593ebd000..ebb210435b43 100644 --- a/R-package/man/xgb.create.features.Rd +++ b/R-package/man/xgb.create.features.Rd @@ -4,14 +4,12 @@ \alias{xgb.create.features} \title{Create new features from a previously learned model} \usage{ -xgb.create.features(model, data, ...) +xgb.create.features(model, data) } \arguments{ \item{model}{Decision tree boosting model learned on the original data.} \item{data}{Original data (usually provided as a \code{dgCMatrix} matrix).} - -\item{...}{Currently not used.} } \value{ A \code{dgCMatrix} matrix including both the original data and the new features. @@ -64,10 +62,10 @@ data(agaricus.test, package = "xgboost") dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) -param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic') +param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic', nthread = 1) nrounds = 4 -bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2) +bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds) # Model accuracy without new features accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / @@ -79,12 +77,12 @@ new.features.test <- xgb.create.features(model = bst, agaricus.test$data) # learning with new features new.dtrain <- xgb.DMatrix( - data = new.features.train, label = agaricus.train$label, nthread = 2 + data = new.features.train, label = agaricus.train$label ) new.dtest <- xgb.DMatrix( - data = new.features.test, label = agaricus.test$label, nthread = 2 + data = new.features.test, label = agaricus.test$label ) -bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) +bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds) # Model accuracy with new features accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 299e92973555..8c48659847e9 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -12,8 +12,8 @@ xgb.cv( prediction = FALSE, showsd = TRUE, metrics = list(), - obj = NULL, - feval = NULL, + objective = NULL, + custom_metric = NULL, stratified = "auto", folds = NULL, train_folds = NULL, @@ -31,8 +31,7 @@ See the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online doc and the documentation for \code{\link[=xgb.params]{xgb.params()}} for details. Should be passed as list with named entries. Parameters that are not specified in this -list will use their default values. Alternatively, parameters may be passed directly -as function arguments (accepted through \code{...}). +list will use their default values. A list of named parameters can be created through the function \code{\link[=xgb.params]{xgb.params()}}, which accepts all valid parameters as function arguments.} @@ -43,7 +42,7 @@ for model training by the objective. Note that only the basic \code{xgb.DMatrix} class is supported - variants such as \code{xgb.QuantileDMatrix} or \code{xgb.ExtMemDMatrix} are not supported here.} -\item{nrounds}{The max number of iterations.} +\item{nrounds}{Max number of boosting iterations.} \item{nfold}{The original dataset is randomly partitioned into \code{nfold} equal size subsamples.} @@ -66,11 +65,24 @@ Possible options are: \item \code{merror}: Exact matching error used to evaluate multi-class classification }} -\item{obj}{Customized objective function. Returns gradient and second order -gradient with given prediction and dtrain.} +\item{objective}{Customized objective function. Should take two arguments: the first one will be the +current predictions (either a numeric vector or matrix depending on the number of targets / classes), +and the second one will be the \code{data} DMatrix object that is used for training. -\item{feval}{Customized evaluation function. Returns -\code{list(metric='metric-name', value='metric-value')} with given prediction and dtrain.} +It should return a list with two elements \code{grad} and \code{hess} (in that order), as either +numeric vectors or numeric matrices depending on the number of targets / classes (same +dimension as the predictions that are passed as first argument).} + +\item{custom_metric}{Customized evaluation function. Just like \code{objective}, should take two arguments, +with the first one being the predictions and the second one the \code{data} DMatrix. + +Should return a list with two elements \code{metric} (name that will be displayed for this metric, +should be a string / character), and \code{value} (the number that the function calculates, should +be a numeric scalar). + +Note that even if passing \code{custom_metric}, objectives also have an associated default metric that +will be evaluated in addition to it. In order to disable the built-in metric, one can pass +parameter \code{disable_default_eval_metric = TRUE}.} \item{stratified}{Logical flag indicating whether sampling of folds should be stratified by the values of outcome labels. For real-valued labels in regression objectives, @@ -98,19 +110,20 @@ the resulting DMatrices.} This is not supported when \code{data} has \code{group} field.} -\item{verbose}{Logical flag. Should statistics be printed during the process?} +\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance. +If 2, some additional information will be printed out. +Note that setting \code{verbose > 0} automatically engages the +\code{xgb.cb.print.evaluation(period=1)} callback function.} -\item{print_every_n}{Print each nth iteration evaluation messages when \code{verbose > 0}. +\item{print_every_n}{Print each nth iteration evaluation messages when \code{verbose>0}. Default is 1 which means all messages are printed. This parameter is passed to the \code{\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback.} \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered. If set to an integer \code{k}, training with a validation set will stop if the performance -doesn't improve for \code{k} rounds. -Setting this parameter engages the \code{\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.} +doesn't improve for \code{k} rounds. Setting this parameter engages the \code{\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.} -\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set, -then this parameter must be set as well. +\item{maximize}{If \code{custom_metric} and \code{early_stopping_rounds} are set, then this parameter must be set as well. When it is \code{TRUE}, it means the larger the evaluation score the better. This parameter is passed to the \code{\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.} @@ -119,7 +132,13 @@ See \code{\link[=xgb.Callback]{xgb.Callback()}}. Some of the callbacks are autom parameters' values. User can provide either existing or their own callback methods in order to customize the training process.} -\item{...}{Other parameters to pass to \code{params}. See \code{\link[=xgb.params]{xgb.params()}} for more details.} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ An object of class 'xgb.cv.synchronous' with the following elements: @@ -169,12 +188,14 @@ dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) cv <- xgb.cv( data = dtrain, nrounds = 3, - nthread = 2, + params = xgb.params( + nthread = 2, + max_depth = 3, + eta = 1, + objective = "binary:logistic" + ), nfold = 5, - metrics = list("rmse","auc"), - max_depth = 3, - eta = 1, - objective = "binary:logistic" + metrics = list("rmse","auc") ) print(cv) print(cv, verbose = TRUE) diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 8bd8e5d6c0d6..a3b622947eac 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -33,7 +33,13 @@ cover is the sum of second order gradient in each node.} Format 'dot' for a single tree can be passed directly to packages that consume this format for graph visualization, such as function \code{DiagrammeR::grViz()}} -\item{...}{Currently not used} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ If fname is not provided or set to \code{NULL} the function will return the model @@ -52,11 +58,13 @@ test <- agaricus.test bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) # save the model in file 'xgb.model.dump' diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd index 76574b9cbf06..f26067d7fef9 100644 --- a/R-package/man/xgb.importance.Rd +++ b/R-package/man/xgb.importance.Rd @@ -72,11 +72,13 @@ data(agaricus.train, package = "xgboost") bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) xgb.importance(model = bst) @@ -84,10 +86,13 @@ xgb.importance(model = bst) # binomial classification using "gblinear": bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - booster = "gblinear", - eta = 0.3, - nthread = 1, - nrounds = 20,objective = "binary:logistic" + nrounds = 20, + params = xgb.params( + booster = "gblinear", + eta = 0.3, + nthread = 1, + objective = "binary:logistic" + ) ) xgb.importance(model = bst) @@ -100,12 +105,14 @@ mbst <- xgb.train( as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1 ), - max_depth = 3, - eta = 0.2, - nthread = 2, nrounds = nrounds, - objective = "multi:softprob", - num_class = nclass + params = xgb.params( + max_depth = 3, + eta = 0.2, + nthread = 2, + objective = "multi:softprob", + num_class = nclass + ) ) # all classes clumped together: @@ -128,12 +135,14 @@ mbst <- xgb.train( scale(as.matrix(iris[, -5])), label = as.numeric(iris$Species) - 1 ), - booster = "gblinear", - eta = 0.2, - nthread = 1, nrounds = 15, - objective = "multi:softprob", - num_class = nclass + params = xgb.params( + booster = "gblinear", + eta = 0.2, + nthread = 1, + objective = "multi:softprob", + num_class = nclass + ) ) xgb.importance(model = mbst) diff --git a/R-package/man/xgb.is.same.Booster.Rd b/R-package/man/xgb.is.same.Booster.Rd index ff5b4b0c31ab..9b7e47491c36 100644 --- a/R-package/man/xgb.is.same.Booster.Rd +++ b/R-package/man/xgb.is.same.Booster.Rd @@ -42,7 +42,7 @@ x <- as.matrix(mtcars[, -1]) model <- xgb.train( params = xgb.params(nthread = 1), data = xgb.DMatrix(x, label = y, nthread = 1), - nround = 3 + nrounds = 3 ) model_shallow_copy <- model diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd index bb898d6f5bfb..3059d530b017 100644 --- a/R-package/man/xgb.load.Rd +++ b/R-package/man/xgb.load.Rd @@ -38,11 +38,13 @@ test <- agaricus.test bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) fname <- file.path(tempdir(), "xgb.ubj") diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 97533c883874..f55fc17a4e7b 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -28,7 +28,13 @@ is zero-based (e.g., use \code{trees = 0:4} for the first five trees).} "Missing" should be represented as integers (when \code{TRUE}) or as "Tree-Node" character strings (when \code{FALSE}, default).} -\item{...}{Currently not used.} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ A \code{data.table} with detailed information about tree nodes. It has the following columns: @@ -64,11 +70,13 @@ data.table::setDTthreads(nthread) bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 2, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) # This bst model already has feature_names stored with it, so those would be used when diff --git a/R-package/man/xgb.model.parameters.Rd b/R-package/man/xgb.model.parameters.Rd index 33537c0514fa..5f7c11a1eb95 100644 --- a/R-package/man/xgb.model.parameters.Rd +++ b/R-package/man/xgb.model.parameters.Rd @@ -36,11 +36,13 @@ train <- agaricus.train bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) xgb.model.parameters(bst) <- list(eta = 0.1) diff --git a/R-package/man/xgb.plot.deepness.Rd b/R-package/man/xgb.plot.deepness.Rd index 536bb98c8436..e8729b7ca9be 100644 --- a/R-package/man/xgb.plot.deepness.Rd +++ b/R-package/man/xgb.plot.deepness.Rd @@ -76,12 +76,14 @@ data.table::setDTthreads(nthread) ## Change max_depth to a higher number to get a more significant result bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 6, - nthread = nthread, nrounds = 50, - objective = "binary:logistic", - subsample = 0.5, - min_child_weight = 2 + params = xgb.params( + max_depth = 6, + nthread = nthread, + objective = "binary:logistic", + subsample = 0.5, + min_child_weight = 2 + ) ) xgb.plot.deepness(bst) diff --git a/R-package/man/xgb.plot.importance.Rd b/R-package/man/xgb.plot.importance.Rd index 6b26bec2a86d..7b9dc40d2450 100644 --- a/R-package/man/xgb.plot.importance.Rd +++ b/R-package/man/xgb.plot.importance.Rd @@ -90,11 +90,13 @@ data.table::setDTthreads(nthread) bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 3, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 3, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst) diff --git a/R-package/man/xgb.plot.multi.trees.Rd b/R-package/man/xgb.plot.multi.trees.Rd index df72ee452ee6..989096e60e42 100644 --- a/R-package/man/xgb.plot.multi.trees.Rd +++ b/R-package/man/xgb.plot.multi.trees.Rd @@ -26,7 +26,13 @@ The values are passed to \code{DiagrammeR::render_graph()}.} \item{render}{Should the graph be rendered or not? The default is \code{TRUE}.} -\item{...}{Currently not used.} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ Rendered graph object which is an htmlwidget of ' class \code{grViz}. Similar to @@ -64,13 +70,15 @@ data.table::setDTthreads(nthread) bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label), - max_depth = 15, - eta = 1, - nthread = nthread, nrounds = 30, - objective = "binary:logistic", - min_child_weight = 50, - verbose = 0 + verbose = 0, + params = xgb.params( + max_depth = 15, + eta = 1, + nthread = nthread, + objective = "binary:logistic", + min_child_weight = 50 + ) ) p <- xgb.plot.multi.trees(model = bst, features_keep = 3) diff --git a/R-package/man/xgb.plot.shap.Rd b/R-package/man/xgb.plot.shap.Rd index 969a7d103c62..7bdd5ad2bfac 100644 --- a/R-package/man/xgb.plot.shap.Rd +++ b/R-package/man/xgb.plot.shap.Rd @@ -137,12 +137,14 @@ nrounds <- 20 bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, agaricus.train$label), nrounds = nrounds, - eta = 0.1, - max_depth = 3, - subsample = 0.5, - objective = "binary:logistic", - nthread = nthread, - verbose = 0 + verbose = 0, + params = xgb.params( + eta = 0.1, + max_depth = 3, + subsample = 0.5, + objective = "binary:logistic", + nthread = nthread + ) ) xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none") @@ -162,13 +164,15 @@ is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values mbst <- xgb.train( data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1), nrounds = nrounds, - max_depth = 2, - eta = 0.3, - subsample = 0.5, - nthread = nthread, - objective = "multi:softprob", - num_class = nclass, - verbose = 0 + verbose = 0, + params = xgb.params( + max_depth = 2, + eta = 0.3, + subsample = 0.5, + nthread = nthread, + objective = "multi:softprob", + num_class = nclass + ) ) trees0 <- seq(from = 0, by = nclass, length.out = nrounds) col <- rgb(0, 0, 1, 0.5) diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index c58187d0f520..ebcc8603fe98 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -29,7 +29,13 @@ splits. When this option is on, the model dump contains two additional values: gain is the approximate loss function gain we get in each split; cover is the sum of second order gradient in each node.} -\item{...}{Currently not used.} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ Rendered graph object which is an htmlwidget of ' class \code{grViz}. Similar to @@ -62,11 +68,13 @@ data(agaricus.train, package = "xgboost") bst <- xgb.train( data = xgb.DMatrix(agaricus.train$data, agaricus.train$label), - max_depth = 3, - eta = 1, - nthread = 2, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 3, + eta = 1, + nthread = 2, + objective = "binary:logistic" + ) ) # plot the first tree diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd index ec9ab63f717c..738f947405c7 100644 --- a/R-package/man/xgb.save.Rd +++ b/R-package/man/xgb.save.Rd @@ -52,11 +52,13 @@ test <- agaricus.test bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) fname <- file.path(tempdir(), "xgb.ubj") diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd index d5b0d7cc9d6c..24f190a88c30 100644 --- a/R-package/man/xgb.save.raw.Rd +++ b/R-package/man/xgb.save.raw.Rd @@ -34,11 +34,13 @@ test <- agaricus.test bst <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, - eta = 1, - nthread = nthread, nrounds = 2, - objective = "binary:logistic" + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = nthread, + objective = "binary:logistic" + ) ) raw <- xgb.save.raw(bst) diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index c46ebe8fb9f6..c1faa8dc895a 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -9,8 +9,8 @@ xgb.train( data, nrounds, evals = list(), - obj = NULL, - feval = NULL, + objective = NULL, + custom_metric = NULL, verbose = 1, print_every_n = 1L, early_stopping_rounds = NULL, @@ -28,8 +28,7 @@ See the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online doc and the documentation for \code{\link[=xgb.params]{xgb.params()}} for details. Should be passed as list with named entries. Parameters that are not specified in this -list will use their default values. Alternatively, parameters may be passed directly -as function arguments (accepted through \code{...}). +list will use their default values. A list of named parameters can be created through the function \code{\link[=xgb.params]{xgb.params()}}, which accepts all valid parameters as function arguments.} @@ -42,15 +41,17 @@ as inputs, such as data frames and matrices.} \item{nrounds}{Max number of boosting iterations.} \item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance. -Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each -of these datasets during each boosting iteration, and stored in the end as a field named -\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or -\code{\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback is engaged, the performance results are continuously -printed out during the training. +Metrics specified in either \code{eval_metric} (under params) or \code{custom_metric} (function +argument here) will be computed for each of these datasets during each boosting iteration, +and stored in the end as a field named \code{evaluation_log} in the resulting object. + +When either \code{verbose>=1} or \code{\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback is engaged, the performance +results are continuously printed out during the training. + E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track -the performance of each round's model on mat1 and mat2.} +the performance of each round's model on \code{mat1} and \code{mat2}.} -\item{obj}{Customized objective function. Should take two arguments: the first one will be the +\item{objective}{Customized objective function. Should take two arguments: the first one will be the current predictions (either a numeric vector or matrix depending on the number of targets / classes), and the second one will be the \code{data} DMatrix object that is used for training. @@ -58,14 +59,14 @@ It should return a list with two elements \code{grad} and \code{hess} (in that o numeric vectors or numeric matrices depending on the number of targets / classes (same dimension as the predictions that are passed as first argument).} -\item{feval}{Customized evaluation function. Just like \code{obj}, should take two arguments, with -the first one being the predictions and the second one the \code{data} DMatrix. +\item{custom_metric}{Customized evaluation function. Just like \code{objective}, should take two arguments, +with the first one being the predictions and the second one the \code{data} DMatrix. Should return a list with two elements \code{metric} (name that will be displayed for this metric, should be a string / character), and \code{value} (the number that the function calculates, should be a numeric scalar). -Note that even if passing \code{feval}, objectives also have an associated default metric that +Note that even if passing \code{custom_metric}, objectives also have an associated default metric that will be evaluated in addition to it. In order to disable the built-in metric, one can pass parameter \code{disable_default_eval_metric = TRUE}.} @@ -82,7 +83,7 @@ Default is 1 which means all messages are printed. This parameter is passed to t If set to an integer \code{k}, training with a validation set will stop if the performance doesn't improve for \code{k} rounds. Setting this parameter engages the \code{\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.} -\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set, then this parameter must be set as well. +\item{maximize}{If \code{custom_metric} and \code{early_stopping_rounds} are set, then this parameter must be set as well. When it is \code{TRUE}, it means the larger the evaluation score the better. This parameter is passed to the \code{\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.} @@ -105,7 +106,13 @@ such as an evaluation log (a \code{data.table} object) - be aware that these obj as R attributes, and thus do not get saved when using XGBoost's own serializaters like \code{\link[=xgb.save]{xgb.save()}} (but are kept when using R serializers like \code{\link[=saveRDS]{saveRDS()}}).} -\item{...}{Other parameters to pass to \code{params}. See \code{\link[=xgb.params]{xgb.params()}} for more details.} +\item{...}{Not used. + +Some arguments are currently deprecated or have been renamed. If a deprecated argument +is passed, will throw a warning and use its current equivalent. + +If some additional argument is passed that is neither a current function argument nor +a deprecated argument, an error will be thrown.} } \value{ An object of class \code{xgb.Booster}. @@ -169,7 +176,7 @@ dtest <- with( evals <- list(train = dtrain, eval = dtest) ## A simple xgb.train example: -param <- list( +param <- xgb.params( max_depth = 2, eta = 1, nthread = nthread, @@ -193,9 +200,9 @@ evalerror <- function(preds, dtrain) { return(list(metric = "error", value = err)) } -# These functions could be used by passing them either: -# as 'objective' and 'eval_metric' parameters in the params list: -param <- list( +# These functions could be used by passing them as 'objective' and +# 'eval_metric' parameters in the params list: +param <- xgb.params( max_depth = 2, eta = 1, nthread = nthread, @@ -204,21 +211,11 @@ param <- list( ) bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0) -# or through the ... arguments: -param <- list(max_depth = 2, eta = 1, nthread = nthread) -bst <- xgb.train( - param, - dtrain, - nrounds = 2, - evals = evals, - verbose = 0, - objective = logregobj, - eval_metric = evalerror -) - -# or as dedicated 'obj' and 'feval' parameters of xgb.train: +# ... or as dedicated 'objective' and 'custom_metric' parameters of xgb.train: bst <- xgb.train( - param, dtrain, nrounds = 2, evals = evals, obj = logregobj, feval = evalerror + within(param, rm("objective", "eval_metric")), + dtrain, nrounds = 2, evals = evals, + objective = logregobj, custom_metric = evalerror ) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 03a346d02076..f2d6f78d240b 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -17,9 +17,15 @@ test_that("train and predict binary classification", { nrounds <- 2 expect_output( bst <- xgb.train( - data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = nrounds, - objective = "binary:logistic", eval_metric = "error", + data = xgb.DMatrix(train$data, label = train$label), + nrounds = nrounds, + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = n_threads, + objective = "binary:logistic", + eval_metric = "error" + ), evals = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error" @@ -104,14 +110,16 @@ test_that("dart prediction works", { set.seed(1994) booster_by_xgboost <- xgb.train( data = xgb.DMatrix(d, label = y), - max_depth = 2, - booster = "dart", - rate_drop = 0.5, - one_drop = TRUE, - eta = 1, - nthread = n_threads, nrounds = nrounds, - objective = "reg:squarederror" + params = xgb.params( + max_depth = 2, + booster = "dart", + rate_drop = 0.5, + one_drop = TRUE, + eta = 1, + nthread = n_threads, + objective = "reg:squarederror" + ) ) pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL) pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds)) @@ -123,7 +131,7 @@ test_that("dart prediction works", { set.seed(1994) dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads) booster_by_train <- xgb.train( - params = list( + params = xgb.params( booster = "dart", max_depth = 2, eta = 1, @@ -150,8 +158,11 @@ test_that("train and predict softprob", { expect_output( bst <- xgb.train( data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), - max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5, - objective = "multi:softprob", num_class = 3, eval_metric = "merror", + nrounds = 5, + params = xgb.params( + max_depth = 3, eta = 0.5, nthread = n_threads, + objective = "multi:softprob", num_class = 3, eval_metric = "merror" + ), evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ), "train-merror" @@ -186,9 +197,14 @@ test_that("train and predict softprob", { y <- sample.int(10, 100, replace = TRUE) - 1 dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads) booster <- xgb.train( - params = list(tree_method = "hist", nthread = n_threads), - data = dtrain, nrounds = 4, num_class = 10, - objective = "multi:softprob" + params = xgb.params( + objective = "multi:softprob", + num_class = 10, + tree_method = "hist", + nthread = n_threads + ), + data = dtrain, + nrounds = 4 ) predt <- predict(booster, as.matrix(d), strict_shape = FALSE) expect_equal(ncol(predt), 10) @@ -201,8 +217,11 @@ test_that("train and predict softmax", { expect_output( bst <- xgb.train( data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), - max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5, - objective = "multi:softmax", num_class = 3, eval_metric = "merror", + nrounds = 5, + params = xgb.params( + max_depth = 3, eta = 0.5, nthread = n_threads, + objective = "multi:softmax", num_class = 3, eval_metric = "merror" + ), evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ), "train-merror" @@ -222,10 +241,14 @@ test_that("train and predict RF", { lb <- train$label # single iteration bst <- xgb.train( - data = xgb.DMatrix(train$data, label = lb), max_depth = 5, - nthread = n_threads, - nrounds = 1, objective = "binary:logistic", eval_metric = "error", - num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1, + data = xgb.DMatrix(train$data, label = lb), + nrounds = 1, + params = xgb.params( + max_depth = 5, + nthread = n_threads, + objective = "binary:logistic", eval_metric = "error", + num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1 + ), evals = list(train = xgb.DMatrix(train$data, label = lb)) ) expect_equal(xgb.get.num.boosted.rounds(bst), 1) @@ -246,10 +269,14 @@ test_that("train and predict RF with softprob", { set.seed(11) bst <- xgb.train( data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), - max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds, - objective = "multi:softprob", eval_metric = "merror", - num_class = 3, verbose = 0, - num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5, + nrounds = nrounds, + verbose = 0, + params = xgb.params( + max_depth = 3, eta = 0.9, nthread = n_threads, + objective = "multi:softprob", eval_metric = "merror", + num_class = 3, + num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5 + ), evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb)) ) expect_equal(xgb.get.num.boosted.rounds(bst), 15) @@ -268,9 +295,13 @@ test_that("train and predict RF with softprob", { test_that("use of multiple eval metrics works", { expect_output( bst <- xgb.train( - data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = "error", eval_metric = "auc", eval_metric = "logloss", + data = xgb.DMatrix(train$data, label = train$label), + nrounds = 2, + params = list( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic", + eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" + ), evals = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error.*train-auc.*train-logloss" @@ -280,9 +311,13 @@ test_that("use of multiple eval metrics works", { expect_equal(colnames(attributes(bst)$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss")) expect_output( bst2 <- xgb.train( - data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = list("error", "auc", "logloss"), + data = xgb.DMatrix(train$data, label = train$label), + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic", + eval_metric = list("error", "auc", "logloss") + ), evals = list(train = xgb.DMatrix(train$data, label = train$label)) ), "train-error.*train-auc.*train-logloss" @@ -296,18 +331,18 @@ test_that("use of multiple eval metrics works", { test_that("training continuation works", { dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads) evals <- list(train = dtrain) - param <- list( + params <- xgb.params( objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads ) # for the reference, use 4 iterations at once: set.seed(11) - bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0) + bst <- xgb.train(params, dtrain, nrounds = 4, evals = evals, verbose = 0) # first two iterations: set.seed(11) - bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0) + bst1 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0) # continue for two more: - bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1) + bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1) if (!windows_flag && !solaris_flag) { expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2)) } @@ -315,7 +350,7 @@ test_that("training continuation works", { expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2)) expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log) # test continuing from raw model data - bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1)) + bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1)) if (!windows_flag && !solaris_flag) { expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2)) } @@ -323,7 +358,7 @@ test_that("training continuation works", { # test continuing from a model in file fname <- file.path(tempdir(), "xgboost.json") xgb.save(bst1, fname) - bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname) + bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname) if (!windows_flag && !solaris_flag) { expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2)) } @@ -334,9 +369,15 @@ test_that("xgb.cv works", { set.seed(11) expect_output( cv <- xgb.cv( - data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, nfold = 5, - eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = "error", verbose = TRUE + data = xgb.DMatrix(train$data, label = train$label), + nfold = 5, + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1., nthread = n_threads, objective = "binary:logistic", + eval_metric = "error" + ), + verbose = TRUE ), "train-error:" ) @@ -355,14 +396,24 @@ test_that("xgb.cv works with stratified folds", { dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads) set.seed(314159) cv <- xgb.cv( - data = dtrain, max_depth = 2, nfold = 5, - eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic", + data = dtrain, + nrounds = 2, + nfold = 5, + params = xgb.params( + max_depth = 2, + eta = 1., nthread = n_threads, objective = "binary:logistic" + ), verbose = FALSE, stratified = FALSE ) set.seed(314159) cv2 <- xgb.cv( - data = dtrain, max_depth = 2, nfold = 5, - eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic", + data = dtrain, + nfold = 5, + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1., nthread = n_threads, objective = "binary:logistic" + ), verbose = FALSE, stratified = TRUE ) # Stratified folds should result in a different evaluation logs @@ -373,8 +424,12 @@ test_that("train and predict with non-strict classes", { # standard dense matrix input train_dense <- as.matrix(train$data) bst <- xgb.train( - data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", + data = xgb.DMatrix(train_dense, label = train$label), + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic" + ), verbose = 0 ) pr0 <- predict(bst, train_dense) @@ -384,8 +439,12 @@ test_that("train and predict with non-strict classes", { expect_true(is.matrix(train_dense)) expect_error( bst <- xgb.train( - data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", + data = xgb.DMatrix(train_dense, label = train$label), + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic" + ), verbose = 0 ), regexp = NA @@ -398,8 +457,12 @@ test_that("train and predict with non-strict classes", { expect_true(is.matrix(train_dense)) expect_error( bst <- xgb.train( - data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", + data = xgb.DMatrix(train_dense, label = train$label), + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic" + ), verbose = 0 ), regexp = NA @@ -418,16 +481,16 @@ test_that("max_delta_step works", { agaricus.train$data, label = agaricus.train$label, nthread = n_threads ) evals <- list(train = dtrain) - param <- list( + params <- xgb.params( objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = n_threads, eta = 0.5 ) nrounds <- 5 # model with no restriction on max_delta_step - bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1) + bst1 <- xgb.train(params, dtrain, nrounds, evals = evals, verbose = 1) # model with restricted max_delta_step - bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1) + bst2 <- xgb.train(c(params, list(max_delta_step = 1)), dtrain, nrounds, evals = evals, verbose = 1) # the no-restriction model is expected to have consistently lower loss during the initial iterations expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss)) expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8) @@ -447,13 +510,13 @@ test_that("colsample_bytree works", { evals <- list(train = dtrain, eval = dtest) ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for ## each tree - param <- list( + params <- xgb.params( max_depth = 2, eta = 0, nthread = n_threads, colsample_bytree = 0.01, objective = "binary:logistic", eval_metric = "auc" ) set.seed(2) - bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0) + bst <- xgb.train(params, dtrain, nrounds = 100, evals = evals, verbose = 0) xgb.importance(model = bst) # If colsample_bytree works properly, a variety of features should be used # in the 100 trees @@ -462,9 +525,12 @@ test_that("colsample_bytree works", { test_that("Configuration works", { bst <- xgb.train( - data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, - eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic", - eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" + data = xgb.DMatrix(train$data, label = train$label), + nrounds = 2, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = n_threads, objective = "binary:logistic" + ) ) config <- xgb.config(bst) xgb.config(bst) <- config @@ -514,8 +580,11 @@ test_that("strict_shape works", { bst <- xgb.train( data = xgb.DMatrix(X, label = y), - max_depth = 2, nrounds = n_rounds, nthread = n_threads, - objective = "multi:softprob", num_class = 3, eval_metric = "merror" + nrounds = n_rounds, + params = xgb.params( + max_depth = 2, nthread = n_threads, + objective = "multi:softprob", num_class = 3 + ) ) test_strict_shape(bst, X, 3) @@ -528,9 +597,12 @@ test_that("strict_shape works", { y <- agaricus.train$label bst <- xgb.train( - data = xgb.DMatrix(X, label = y), max_depth = 2, nthread = n_threads, - nrounds = n_rounds, objective = "binary:logistic", - eval_metric = "error", eval_metric = "auc", eval_metric = "logloss" + data = xgb.DMatrix(X, label = y), + nrounds = n_rounds, + params = xgb.params( + max_depth = 2, nthread = n_threads, + objective = "binary:logistic" + ) ) test_strict_shape(bst, X, 1) @@ -547,8 +619,12 @@ test_that("'predict' accepts CSR data", { x_csr <- as(x_csc, "RsparseMatrix") x_spv <- as(x_csc, "sparseVector") bst <- xgb.train( - data = xgb.DMatrix(X, label = y), objective = "binary:logistic", - nrounds = 5L, verbose = FALSE, nthread = n_threads, + data = xgb.DMatrix(X, label = y), + nrounds = 5L, verbose = FALSE, + params = xgb.params( + objective = "binary:logistic", + nthread = n_threads + ) ) p_csc <- predict(bst, x_csc) p_csr <- predict(bst, x_csr) @@ -564,7 +640,7 @@ test_that("Quantile regression accepts multiple quantiles", { dm <- xgb.DMatrix(data = x, label = y) model <- xgb.train( data = dm, - params = list( + params = xgb.params( objective = "reg:quantileerror", tree_method = "exact", quantile_alpha = c(0.05, 0.5, 0.95), @@ -591,7 +667,7 @@ test_that("Can use multi-output labels with built-in objectives", { y_mirrored <- cbind(y, -y) dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads) model <- xgb.train( - params = list( + params = xgb.params( tree_method = "hist", multi_strategy = "multi_output_tree", objective = "reg:squarederror", @@ -613,7 +689,7 @@ test_that("Can use multi-output labels with custom objectives", { y_mirrored <- cbind(y, -y) dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads) model <- xgb.train( - params = list( + params = xgb.params( tree_method = "hist", multi_strategy = "multi_output_tree", base_score = 0, @@ -645,11 +721,13 @@ test_that("Can use ranking objectives with either 'qid' or 'group'", { dmat_qid <- xgb.DMatrix(x, label = y, qid = qid) dmat_gr <- xgb.DMatrix(x, label = y, group = gr) - params <- list(tree_method = "hist", - lambdarank_num_pair_per_sample = 8, - objective = "rank:ndcg", - lambdarank_pair_method = "topk", - nthread = n_threads) + params <- xgb.params( + tree_method = "hist", + lambdarank_num_pair_per_sample = 8, + objective = "rank:ndcg", + lambdarank_pair_method = "topk", + nthread = n_threads + ) set.seed(123) model_qid <- xgb.train(params, dmat_qid, nrounds = 5) set.seed(123) @@ -667,7 +745,7 @@ test_that("Can predict on data.frame objects", { x_mat <- as.matrix(x_df) dm <- xgb.DMatrix(x_mat, label = y, nthread = n_threads) model <- xgb.train( - params = list( + params = xgb.params( tree_method = "hist", objective = "reg:squarederror", nthread = n_threads @@ -687,7 +765,7 @@ test_that("'base_margin' gives the same result in DMatrix as in inplace_predict" x <- as.matrix(mtcars[, -1]) dm <- xgb.DMatrix(x, label = y, nthread = n_threads) model <- xgb.train( - params = list( + params = xgb.params( tree_method = "hist", objective = "reg:squarederror", nthread = n_threads @@ -714,7 +792,7 @@ test_that("Coefficients from gblinear have the expected shape and names", { dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list( + params = xgb.params( booster = "gblinear", nthread = 1 ), @@ -735,7 +813,7 @@ test_that("Coefficients from gblinear have the expected shape and names", { mm <- model.matrix(~., data = iris[, -5]) model <- xgb.train( data = dm, - params = list( + params = xgb.params( booster = "gblinear", objective = "multi:softprob", num_class = 3, @@ -772,7 +850,7 @@ test_that("Deep copies work as expected", { dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list(nthread = 1), + params = xgb.params(nthread = 1), nrounds = 3 ) @@ -795,7 +873,7 @@ test_that("Pointer comparison works as expected", { y <- mtcars$mpg x <- as.matrix(mtcars[, -1]) model <- xgb.train( - params = list(nthread = 1), + params = xgb.params(nthread = 1), data = xgb.DMatrix(x, label = y, nthread = 1), nrounds = 3 ) @@ -824,7 +902,7 @@ test_that("DMatrix field are set to booster when training", { dm_both <- xgb.DMatrix(x, label = y, feature_names = c("a", "b", "c"), nthread = 1) setinfo(dm_both, "feature_type", c("q", "c", "q")) - params <- list(nthread = 1) + params <- xgb.params(nthread = 1) model_unnamed <- xgb.train(data = dm_unnamed, params = params, nrounds = 3) model_feature_names <- xgb.train(data = dm_feature_names, params = params, nrounds = 3) model_feature_types <- xgb.train(data = dm_feature_types, params = params, nrounds = 3) @@ -853,7 +931,7 @@ test_that("Seed in params override PRNG from R", { agaricus.train$data, label = agaricus.train$label, nthread = 1L ), - params = list( + params = xgb.params( objective = "binary:logistic", max_depth = 3L, subsample = 0.1, @@ -869,7 +947,7 @@ test_that("Seed in params override PRNG from R", { agaricus.train$data, label = agaricus.train$label, nthread = 1L ), - params = list( + params = xgb.params( objective = "binary:logistic", max_depth = 3L, subsample = 0.1, @@ -890,7 +968,7 @@ test_that("Seed in params override PRNG from R", { agaricus.train$data, label = agaricus.train$label, nthread = 1L ), - params = list( + params = xgb.params( objective = "binary:logistic", max_depth = 3L, subsample = 0.1, @@ -913,7 +991,7 @@ test_that("xgb.cv works for AFT", { X <- matrix(c(1, -1, -1, 1, 0, 1, 1, 0), nrow = 4, byrow = TRUE) # 4x2 matrix dtrain <- xgb.DMatrix(X, nthread = n_threads) - params <- list(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L) + params <- xgb.params(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L, nthread = n_threads) # data must have bounds expect_error( @@ -921,8 +999,7 @@ test_that("xgb.cv works for AFT", { params = params, data = dtrain, nround = 5L, - nfold = 4L, - nthread = n_threads + nfold = 4L ) ) @@ -933,7 +1010,7 @@ test_that("xgb.cv works for AFT", { expect_warning( xgb.cv( params = params, data = dtrain, nround = 5L, nfold = 4L, - nthread = n_threads, stratified = TRUE, verbose = FALSE + stratified = TRUE, verbose = FALSE ) ) @@ -951,9 +1028,10 @@ test_that("xgb.cv works for ranking", { dm <- xgb.DMatrix(x, label = y, group = group) res <- xgb.cv( data = dm, - params = list( + params = xgb.params( objective = "rank:pairwise", - max_depth = 3 + max_depth = 3, + nthread = 1L ), nrounds = 3, nfold = 2, @@ -970,7 +1048,7 @@ test_that("Row names are preserved in outputs", { dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list( + params = xgb.params( objective = "multi:softprob", num_class = 3, max_depth = 2, @@ -990,7 +1068,7 @@ test_that("Row names are preserved in outputs", { dm <- xgb.DMatrix(data = x, label = y) model <- xgb.train( data = dm, - params = list( + params = xgb.params( max_depth = 2, nthread = 1 ), diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index 24df4794b30a..5d5bab6428ad 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -24,15 +24,17 @@ evals <- list(train = dtrain, test = dtest) err <- function(label, pr) sum((pr > 0.5) != label) / length(label) -param <- list(objective = "binary:logistic", eval_metric = "error", - max_depth = 2, nthread = n_threads) +params <- xgb.params( + objective = "binary:logistic", eval_metric = "error", + max_depth = 2, nthread = n_threads +) test_that("xgb.cb.print.evaluation works as expected for xgb.train", { logs1 <- capture.output({ model <- xgb.train( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -50,7 +52,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", { logs2 <- capture.output({ model <- xgb.train( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -71,7 +73,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.cv", { logs1 <- capture.output({ model <- xgb.cv( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -89,7 +91,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.cv", { logs2 <- capture.output({ model <- xgb.cv( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -109,7 +111,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.cv", { test_that("xgb.cb.evaluation.log works as expected for xgb.train", { model <- xgb.train( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -129,7 +131,7 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.train", { test_that("xgb.cb.evaluation.log works as expected for xgb.cv", { model <- xgb.cv( data = dtrain, - params = list( + params = xgb.params( objective = "binary:logistic", eval_metric = "auc", max_depth = 2, @@ -150,12 +152,14 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.cv", { }) -param <- list(objective = "binary:logistic", eval_metric = "error", - max_depth = 4, nthread = n_threads) +params <- xgb.params( + objective = "binary:logistic", eval_metric = "error", + max_depth = 4, nthread = n_threads +) test_that("can store evaluation_log without printing", { expect_silent( - bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0) + bst <- xgb.train(params, dtrain, nrounds = 10, evals = evals, verbose = 0) ) expect_false(is.null(attributes(bst)$evaluation_log)) expect_false(is.null(attributes(bst)$evaluation_log$train_error)) @@ -165,15 +169,16 @@ test_that("can store evaluation_log without printing", { test_that("xgb.cb.reset.parameters works as expected", { # fixed eta + params <- c(params, list(eta = 0.9)) set.seed(111) - bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0) + bst0 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0) expect_false(is.null(attributes(bst0)$evaluation_log)) expect_false(is.null(attributes(bst0)$evaluation_log$train_error)) # same eta but re-set as a vector parameter in the callback set.seed(111) my_par <- list(eta = c(0.9, 0.9)) - bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, + bst1 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst1)$evaluation_log$train_error)) expect_equal(attributes(bst0)$evaluation_log$train_error, @@ -182,7 +187,7 @@ test_that("xgb.cb.reset.parameters works as expected", { # same eta but re-set via a function in the callback set.seed(111) my_par <- list(eta = function(itr, itr_end) 0.9) - bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, + bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst2)$evaluation_log$train_error)) expect_equal(attributes(bst0)$evaluation_log$train_error, @@ -191,7 +196,7 @@ test_that("xgb.cb.reset.parameters works as expected", { # different eta re-set as a vector parameter in the callback set.seed(111) my_par <- list(eta = c(0.6, 0.5)) - bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, + bst3 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst3)$evaluation_log$train_error)) expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error)) @@ -199,18 +204,18 @@ test_that("xgb.cb.reset.parameters works as expected", { # resetting multiple parameters at the same time runs with no error my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8)) expect_error( - bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, + bst4 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) , NA) # NA = no error # CV works as well expect_error( - bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0, + bst4 <- xgb.cv(params, dtrain, nfold = 2, nrounds = 2, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) , NA) # NA = no error # expect no learning with 0 learning rate my_par <- list(eta = c(0., 0.)) - bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, + bstX <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bstX)$evaluation_log$train_error)) er <- unique(attributes(bstX)$evaluation_log$train_error) @@ -223,7 +228,7 @@ test_that("xgb.cb.save.model works as expected", { files <- unname(sapply(files, function(f) file.path(tempdir(), f))) for (f in files) if (file.exists(f)) file.remove(f) - bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0, + bst <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json")) expect_true(file.exists(files[1])) expect_true(file.exists(files[2])) @@ -239,7 +244,7 @@ test_that("xgb.cb.save.model works as expected", { expect_equal(xgb.save.raw(bst), xgb.save.raw(b2)) # save_period = 0 saves the last iteration's model - bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0, + bst <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, save_period = 0, save_name = file.path(tempdir(), 'xgboost.json')) expect_true(file.exists(files[3])) b2 <- xgb.load(files[3]) @@ -250,9 +255,10 @@ test_that("xgb.cb.save.model works as expected", { }) test_that("early stopping xgb.train works", { + params <- c(params, list(eta = 0.3)) set.seed(11) expect_output( - bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3, + bst <- xgb.train(params, dtrain, nrounds = 20, evals = evals, early_stopping_rounds = 3, maximize = FALSE) , "Stopping. Best iteration") expect_false(is.null(xgb.attr(bst, "best_iteration"))) @@ -266,7 +272,7 @@ test_that("early stopping xgb.train works", { set.seed(11) expect_silent( - bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3, + bst0 <- xgb.train(params, dtrain, nrounds = 20, evals = evals, early_stopping_rounds = 3, maximize = FALSE, verbose = 0) ) expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log) @@ -282,10 +288,22 @@ test_that("early stopping xgb.train works", { test_that("early stopping using a specific metric works", { set.seed(11) expect_output( - bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6, - eval_metric = "logloss", eval_metric = "auc", - callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE, - metric_name = 'test_logloss'))) + bst <- xgb.train( + c( + within(params, rm("eval_metric")), + list( + eta = 0.6, + eval_metric = "logloss", + eval_metric = "auc" + ) + ), + dtrain, + nrounds = 20, + evals = evals, + callbacks = list( + xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE, metric_name = 'test_logloss') + ) + ) , "Stopping. Best iteration") expect_false(is.null(xgb.attr(bst, "best_iteration"))) expect_lt(xgb.attr(bst, "best_iteration"), 19) @@ -308,13 +326,15 @@ test_that("early stopping works with titanic", { dtx <- model.matrix(~ 0 + ., data = titanic[, c("Pclass", "Sex")]) dty <- titanic$Survived - xgboost::xgb.train( + xgb.train( data = xgb.DMatrix(dtx, label = dty), - objective = "binary:logistic", - eval_metric = "auc", + params = xgb.params( + objective = "binary:logistic", + eval_metric = "auc", + nthread = n_threads + ), nrounds = 100, early_stopping_rounds = 3, - nthread = n_threads, evals = list(train = xgb.DMatrix(dtx, label = dty)) ) @@ -324,9 +344,18 @@ test_that("early stopping works with titanic", { test_that("early stopping xgb.cv works", { set.seed(11) expect_output( - cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20, - early_stopping_rounds = 3, maximize = FALSE) - , "Stopping. Best iteration") + { + cv <- xgb.cv( + c(params, list(eta = 0.3)), + dtrain, + nfold = 5, + nrounds = 20, + early_stopping_rounds = 3, + maximize = FALSE + ) + }, + "Stopping. Best iteration" + ) expect_false(is.null(cv$early_stop$best_iteration)) expect_lt(cv$early_stop$best_iteration, 19) # the best error is min error: @@ -334,9 +363,10 @@ test_that("early stopping xgb.cv works", { }) test_that("prediction in xgb.cv works", { + params <- c(params, list(eta = 0.5)) set.seed(11) nrounds <- 4 - cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0) + cv <- xgb.cv(params, dtrain, nfold = 5, nrounds = nrounds, prediction = TRUE, verbose = 0) expect_false(is.null(cv$evaluation_log)) expect_false(is.null(cv$cv_predict$pred)) expect_length(cv$cv_predict$pred, nrow(train$data)) @@ -346,7 +376,7 @@ test_that("prediction in xgb.cv works", { # save CV models set.seed(11) - cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0, + cvx <- xgb.cv(params, dtrain, nfold = 5, nrounds = nrounds, prediction = TRUE, verbose = 0, callbacks = list(xgb.cb.cv.predict(save_models = TRUE))) expect_equal(cv$evaluation_log, cvx$evaluation_log) expect_length(cvx$cv_predict$models, 5) @@ -355,19 +385,20 @@ test_that("prediction in xgb.cv works", { test_that("prediction in xgb.cv works for gblinear too", { set.seed(11) - p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads) - cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0) + p <- xgb.params(booster = 'gblinear', objective = "reg:logistic", eta = 0.5, nthread = n_threads) + cv <- xgb.cv(p, dtrain, nfold = 5, nrounds = 2, prediction = TRUE, verbose = 0) expect_false(is.null(cv$evaluation_log)) expect_false(is.null(cv$cv_predict$pred)) expect_length(cv$cv_predict$pred, nrow(train$data)) }) test_that("prediction in early-stopping xgb.cv works", { + params <- c(params, list(eta = 0.1, base_score = 0.5)) set.seed(11) expect_output( - cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20, + cv <- xgb.cv(params, dtrain, nfold = 5, nrounds = 20, early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE, - prediction = TRUE, base_score = 0.5, verbose = TRUE) + prediction = TRUE, verbose = TRUE) , "Stopping. Best iteration") expect_false(is.null(cv$early_stop$best_iteration)) @@ -387,11 +418,22 @@ test_that("prediction in xgb.cv for softprob works", { lb <- as.numeric(iris$Species) - 1 set.seed(11) expect_warning( - cv <- xgb.cv(data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), nfold = 4, - eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads, - subsample = 0.8, gamma = 2, verbose = 0, - prediction = TRUE, objective = "multi:softprob", num_class = 3) - , NA) + { + cv <- xgb.cv( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), + nfold = 4, + nrounds = 5, + params = xgb.params( + objective = "multi:softprob", num_class = 3, + eta = 0.5, max_depth = 3, nthread = n_threads, + subsample = 0.8, gamma = 2 + ), + verbose = 0, + prediction = TRUE + ) + }, + NA + ) expect_false(is.null(cv$cv_predict$pred)) expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3)) expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6) @@ -404,7 +446,7 @@ test_that("prediction in xgb.cv works for multi-quantile", { dm <- xgb.DMatrix(x, label = y, nthread = 1) cv <- xgb.cv( data = dm, - params = list( + params = xgb.params( objective = "reg:quantileerror", quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9), nthread = 1 @@ -424,7 +466,7 @@ test_that("prediction in xgb.cv works for multi-output", { dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1) cv <- xgb.cv( data = dm, - params = list( + params = xgb.params( tree_method = "hist", multi_strategy = "multi_output_tree", objective = "reg:squarederror", @@ -445,7 +487,7 @@ test_that("prediction in xgb.cv works for multi-quantile", { dm <- xgb.DMatrix(x, label = y, nthread = 1) cv <- xgb.cv( data = dm, - params = list( + params = xgb.params( objective = "reg:quantileerror", quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9), nthread = 1 @@ -465,7 +507,7 @@ test_that("prediction in xgb.cv works for multi-output", { dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1) cv <- xgb.cv( data = dm, - params = list( + params = xgb.params( tree_method = "hist", multi_strategy = "multi_output_tree", objective = "reg:squarederror", diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index cf3a347d4d9d..b37d20392224 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -41,7 +41,7 @@ test_that("custom objective works", { }) test_that("custom objective in CV works", { - cv <- xgb.cv(param, dtrain, num_round, nfold = 10, verbose = FALSE) + cv <- xgb.cv(param, dtrain, num_round, nfold = 10, verbose = FALSE, stratified = FALSE) expect_false(is.null(cv$evaluation_log)) expect_equal(dim(cv$evaluation_log), c(2, 5)) expect_lt(cv$evaluation_log[num_round, test_error_mean], 0.03) @@ -89,7 +89,7 @@ test_that("custom objective with multi-class shape", { } param$objective <- fake_softprob param$eval_metric <- fake_merror - bst <- xgb.train(param, dtrain, 1, num_class = n_classes) + bst <- xgb.train(c(param, list(num_class = n_classes)), dtrain, 1) }) softmax <- function(values) { @@ -168,13 +168,29 @@ test_that("custom metric with multi-target passes reshaped data to feval", { num_class = 3L, base_score = 0, disable_default_eval_metric = TRUE, + eval_metric = multinomial.ll, max_depth = 123, seed = 123 ), data = dtrain, nrounds = 2L, evals = list(Train = dtrain), - eval_metric = multinomial.ll, + verbose = 0 + ) + + model <- xgb.train( + params = list( + objective = "multi:softmax", + num_class = 3L, + base_score = 0, + disable_default_eval_metric = TRUE, + max_depth = 123, + seed = 123 + ), + data = dtrain, + nrounds = 2L, + evals = list(Train = dtrain), + custom_metric = multinomial.ll, verbose = 0 ) }) diff --git a/R-package/tests/testthat/test_glm.R b/R-package/tests/testthat/test_glm.R index b59de8b62f15..b0212cd53f36 100644 --- a/R-package/tests/testthat/test_glm.R +++ b/R-package/tests/testthat/test_glm.R @@ -21,12 +21,12 @@ test_that("gblinear works", { VERB <- 0 # chatterbox switch param$updater <- 'shotgun' - bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle') + bst <- xgb.train(c(param, list(feature_selector = 'shuffle')), dtrain, n, evals, verbose = VERB) ypred <- predict(bst, dtest) expect_equal(length(getinfo(dtest, 'label')), 1611) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) - bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic', + bst <- xgb.train(c(param, list(feature_selector = 'cyclic')), dtrain, n, evals, verbose = VERB, callbacks = list(xgb.cb.gblinear.history())) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) h <- xgb.gblinear.history(bst) @@ -34,17 +34,17 @@ test_that("gblinear works", { expect_is(h, "matrix") param$updater <- 'coord_descent' - bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic') + bst <- xgb.train(c(param, list(feature_selector = 'cyclic')), dtrain, n, evals, verbose = VERB) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) - bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle') + bst <- xgb.train(c(param, list(feature_selector = 'shuffle')), dtrain, n, evals, verbose = VERB) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) - bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy') + bst <- xgb.train(c(param, list(feature_selector = 'greedy')), dtrain, 2, evals, verbose = VERB) expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL) - bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty', - top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE))) + bst <- xgb.train(c(param, list(feature_selector = 'thrifty', top_k = 50)), dtrain, n, evals, verbose = VERB, + callbacks = list(xgb.cb.gblinear.history(sparse = TRUE))) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) h <- xgb.gblinear.history(bst) expect_equal(dim(h), c(n, ncol(dtrain) + 1)) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index bfffe9e7878c..aabc778eacd7 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -25,15 +25,26 @@ if (isTRUE(VCD_AVAILABLE)) { label <- df[, ifelse(Improved == "Marked", 1, 0)] # binary - bst.Tree <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 9, - eta = 1, nthread = 2, nrounds = nrounds, verbose = 0, - objective = "binary:logistic", booster = "gbtree", - base_score = 0.5) - - bst.GLM <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), - eta = 1, nthread = 1, nrounds = nrounds, verbose = 0, - objective = "binary:logistic", booster = "gblinear", - base_score = 0.5) + bst.Tree <- xgb.train( + data = xgb.DMatrix(sparse_matrix, label = label), + nrounds = nrounds, verbose = 0, + params = xgb.params( + max_depth = 9, + eta = 1, nthread = 2, + objective = "binary:logistic", booster = "gbtree", + base_score = 0.5 + ) + ) + + bst.GLM <- xgb.train( + data = xgb.DMatrix(sparse_matrix, label = label), + nrounds = nrounds, verbose = 0, + params = xgb.params( + eta = 1, nthread = 1, + objective = "binary:logistic", booster = "gblinear", + base_score = 0.5 + ) + ) feature.names <- colnames(sparse_matrix) @@ -45,13 +56,25 @@ if (isTRUE(VCD_AVAILABLE)) { # multiclass mlabel <- as.numeric(iris$Species) - 1 nclass <- 3 -mbst.Tree <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0, - max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds, - objective = "multi:softprob", num_class = nclass, base_score = 0) - -mbst.GLM <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0, - booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds, - objective = "multi:softprob", num_class = nclass, base_score = 0) +mbst.Tree <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), + verbose = 0, + nrounds = nrounds, + params = xgb.params( + max_depth = 3, eta = 0.5, nthread = 2, + objective = "multi:softprob", num_class = nclass, base_score = 0 + ) +) + +mbst.GLM <- xgb.train( + data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), + verbose = 0, + nrounds = nrounds, + params = xgb.params( + booster = "gblinear", eta = 0.1, nthread = 1, + objective = "multi:softprob", num_class = nclass, base_score = 0 + ) +) test_that("xgb.dump works", { .skip_if_vcd_not_available() @@ -74,9 +97,15 @@ test_that("xgb.dump works for gblinear", { expect_length(xgb.dump(bst.GLM), 14) # also make sure that it works properly for a sparse model where some coefficients # are 0 from setting large L1 regularization: - bst.GLM.sp <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), eta = 1, - nthread = 2, nrounds = 1, - alpha = 2, objective = "binary:logistic", booster = "gblinear") + bst.GLM.sp <- xgb.train( + data = xgb.DMatrix(sparse_matrix, label = label), + nrounds = 1, + params = xgb.params( + eta = 1, + nthread = 2, + alpha = 2, objective = "binary:logistic", booster = "gblinear" + ) + ) d.sp <- xgb.dump(bst.GLM.sp) expect_length(d.sp, 14) expect_gt(sum(d.sp == "0"), 0) @@ -327,7 +356,7 @@ test_that("xgb.importance works with and without feature names", { importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees) importance_from_dump <- function() { - model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees) + model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE) imp <- xgb.model.dt.tree( text = model_text_dump, trees = trees @@ -352,11 +381,13 @@ test_that("xgb.importance works with and without feature names", { expect_equal(importance_from_dump(), importance, tolerance = 1e-6) ## decision stump - m <- xgboost::xgb.train( + m <- xgb.train( data = xgb.DMatrix(as.matrix(data.frame(x = c(0, 1))), label = c(1, 2)), nrounds = 1, - base_score = 0.5, - nthread = 2 + params = xgb.params( + base_score = 0.5, + nthread = 2 + ) ) df <- xgb.model.dt.tree(model = m) expect_equal(df$Feature, "Leaf") @@ -384,9 +415,15 @@ test_that("xgb.importance works with GLM model", { test_that("xgb.model.dt.tree and xgb.importance work with a single split model", { .skip_if_vcd_not_available() - bst1 <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 1, - eta = 1, nthread = 2, nrounds = 1, verbose = 0, - objective = "binary:logistic") + bst1 <- xgb.train( + data = xgb.DMatrix(sparse_matrix, label = label), + nrounds = 1, verbose = 0, + params = xgb.params( + max_depth = 1, + eta = 1, nthread = 2, + objective = "binary:logistic" + ) + ) expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error expect_equal(nrow(dt), 3) expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error @@ -406,7 +443,7 @@ test_that("xgb.plot.importance de-duplicates features", { test_that("xgb.plot.tree works with and without feature names", { .skip_if_vcd_not_available() - expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree.unnamed)) + expect_silent(xgb.plot.tree(model = bst.Tree.unnamed)) expect_silent(xgb.plot.tree(model = bst.Tree)) ## Categorical @@ -425,7 +462,7 @@ test_that("xgb.plot.tree works with and without feature names", { test_that("xgb.plot.multi.trees works with and without feature names", { .skip_if_vcd_not_available() - xgb.plot.multi.trees(model = bst.Tree.unnamed, feature_names = feature.names, features_keep = 3) + xgb.plot.multi.trees(model = bst.Tree.unnamed, features_keep = 3) xgb.plot.multi.trees(model = bst.Tree, features_keep = 3) }) @@ -544,18 +581,76 @@ test_that("xgb.plot.shap.summary ignores categorical features", { }) test_that("check.deprecation works", { - ttt <- function(a = NNULL, DUMMY = NULL, ...) { - check.deprecation(...) - as.list((environment())) - } - res <- ttt(a = 1, DUMMY = 2, z = 3) - expect_equal(res, list(a = 1, DUMMY = 2)) - expect_error( - res <- ttt(a = 1, dummy = 22, z = 3), - ) - expect_error( - res <- ttt(a = 1, dumm = 22, z = 3), + data(mtcars) + dm <- xgb.DMatrix(mtcars[, -1L], label = mtcars$mpg) + params <- xgb.params(nthread = 1, max_depth = 2, eval_metric = "rmse") + args_train <- list( + data = dm, + params = params, + nrounds = 10, + verbose = 0 ) + + # with exact name + expect_warning({ + model <- xgb.train( + data = dm, + params = params, + nrounds = 10, + watchlist = list(tr = dm), + verbose = 0 + ) + }, regexp = "watchlist") + expect_true(hasName(attributes(model), "evaluation_log")) + expect_equal(names(attributes(model)$evaluation_log), c("iter", "tr_rmse")) + + # with partial name match + expect_warning({ + model <- xgb.train( + data = dm, + params = params, + nrounds = 10, + watchlis = list(train = dm), + verbose = 0 + ) + }, regexp = "watchlist") + expect_true(hasName(attributes(model), "evaluation_log")) + expect_equal(names(attributes(model)$evaluation_log), c("iter", "train_rmse")) + + # error is thrown if argument cannot be matched + expect_error({ + model <- xgb.train( + data = dm, + params = params, + nrounds = 10, + watchlistt = list(train = dm), + verbose = 0 + ) + }, regexp = "unrecognized") + + # error should suggest to put under 'params' if it goes there + expect_error({ + model <- xgb.train( + data = dm, + nthread = 1, max_depth = 2, eval_metric = "rmse", + nrounds = 10, + watchlistt = list(train = dm), + verbose = 0 + ) + }, regexp = "should be passed as a list to argument 'params'") + + # can take more than one deprecated parameter + expect_warning({ + model <- xgb.train( + training.data = dm, + params = params, + nrounds = 10, + watchlis = list(tr = dm), + verbose = 0 + ) + }, regexp = "training.data") + expect_true(hasName(attributes(model), "evaluation_log")) + expect_equal(names(attributes(model)$evaluation_log), c("iter", "tr_rmse")) }) test_that('convert.labels works', { diff --git a/R-package/tests/testthat/test_interaction_constraints.R b/R-package/tests/testthat/test_interaction_constraints.R index cfffb029ce84..d28f1e618d49 100644 --- a/R-package/tests/testthat/test_interaction_constraints.R +++ b/R-package/tests/testthat/test_interaction_constraints.R @@ -13,9 +13,15 @@ train <- matrix(c(x1, x2, x3), ncol = 3) test_that("interaction constraints for regression", { # Fit a model that only allows interaction between x1 and x2 - bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 3, - eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, - interaction_constraints = list(c(0, 1))) + bst <- xgb.train( + data = xgb.DMatrix(train, label = y), + nrounds = 100, verbose = 0, + params = xgb.params( + max_depth = 3, + eta = 0.1, nthread = 2, + interaction_constraints = list(c(0, 1)) + ) + ) # Set all observations to have the same x3 values then increment # by the same amount @@ -52,13 +58,20 @@ test_that("interaction constraints scientific representation", { with_inc <- xgb.train( data = dtrain, - tree_method = 'hist', - interaction_constraints = inc, nrounds = 10, - nthread = n_threads + params = xgb.params( + tree_method = 'hist', + interaction_constraints = inc, + nthread = n_threads + ) ) without_inc <- xgb.train( - data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads + data = dtrain, + nrounds = 10, + params = xgb.params( + tree_method = 'hist', + nthread = n_threads + ) ) expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc)) }) diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R index 1380225c79f7..a01adcc532d4 100644 --- a/R-package/tests/testthat/test_interactions.R +++ b/R-package/tests/testthat/test_interactions.R @@ -123,7 +123,7 @@ test_that("multiclass feature interactions work", { dm <- xgb.DMatrix( as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads ) - param <- list( + param <- xgb.params( eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads ) b <- xgb.train(param, dm, 40) @@ -152,10 +152,12 @@ test_that("SHAP single sample works", { test <- agaricus.test booster <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - max_depth = 2, nrounds = 4, - objective = "binary:logistic", - nthread = n_threads + params = xgb.params( + max_depth = 2, + objective = "binary:logistic", + nthread = n_threads + ) ) predt <- predict( diff --git a/R-package/tests/testthat/test_io.R b/R-package/tests/testthat/test_io.R index 36a6d7572f2f..3265ca0197af 100644 --- a/R-package/tests/testthat/test_io.R +++ b/R-package/tests/testthat/test_io.R @@ -9,8 +9,11 @@ test_that("load/save raw works", { nrounds <- 8 booster <- xgb.train( data = xgb.DMatrix(train$data, label = train$label), - nrounds = nrounds, objective = "binary:logistic", - nthread = 2 + nrounds = nrounds, + params = xgb.params( + objective = "binary:logistic", + nthread = 2 + ) ) json_bytes <- xgb.save.raw(booster, raw_format = "json") @@ -34,7 +37,7 @@ test_that("saveRDS preserves C and R attributes", { dm <- xgb.DMatrix(x, label = y, nthread = 1) model <- xgb.train( data = dm, - params = list(nthread = 1, max_depth = 2), + params = xgb.params(nthread = 1, max_depth = 2), nrounds = 5 ) attributes(model)$my_attr <- "qwerty" diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R index 671c02bd0658..70d67ceb7e22 100644 --- a/R-package/tests/testthat/test_monotone.R +++ b/R-package/tests/testthat/test_monotone.R @@ -7,9 +7,15 @@ train <- matrix(x, ncol = 1) test_that("monotone constraints for regression", { - bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 2, - eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, - monotone_constraints = -1) + bst <- xgb.train( + data = xgb.DMatrix(train, label = y), + nrounds = 100, verbose = 0, + params = xgb.params( + max_depth = 2, + eta = 0.1, nthread = 2, + monotone_constraints = -1 + ) + ) pred <- predict(bst, train) diff --git a/R-package/tests/testthat/test_parameter_exposure.R b/R-package/tests/testthat/test_parameter_exposure.R index ed5c28ca5aaa..aacefe3a83ce 100644 --- a/R-package/tests/testthat/test_parameter_exposure.R +++ b/R-package/tests/testthat/test_parameter_exposure.R @@ -10,13 +10,17 @@ dtest <- xgb.DMatrix( agaricus.test$data, label = agaricus.test$label, nthread = 2 ) -bst <- xgb.train(data = dtrain, - max_depth = 2, - eta = 1, - nrounds = 10, - nthread = 1, - verbose = 0, - objective = "binary:logistic") +bst <- xgb.train( + data = dtrain, + verbose = 0, + nrounds = 10, + params = xgb.params( + max_depth = 2, + eta = 1, + nthread = 1, + objective = "binary:logistic" + ) +) test_that("call is exposed to R", { expect_false(is.null(attributes(bst)$call)) diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R index adf199c052fb..c045091d044b 100644 --- a/R-package/tests/testthat/test_poisson_regression.R +++ b/R-package/tests/testthat/test_poisson_regression.R @@ -6,7 +6,8 @@ test_that("Poisson regression works", { data(mtcars) bst <- xgb.train( data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]), - objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2 + nrounds = 10, verbose = 0, + params = xgb.params(objective = 'count:poisson', nthread = 2) ) expect_equal(class(bst), "xgb.Booster") pred <- predict(bst, as.matrix(mtcars[, -11])) @@ -21,7 +22,7 @@ test_that("Poisson regression is centered around mean", { x <- matrix(rnorm(m * n), nrow = m) model <- xgb.train( data = xgb.DMatrix(x, label = y), - params = list(objective = "count:poisson", gamma = 1e4), + params = xgb.params(objective = "count:poisson", gamma = 1e4), nrounds = 1 ) model_json <- xgb.save.raw(model, "json") |> rawToChar() |> jsonlite::fromJSON() @@ -41,7 +42,7 @@ test_that("Poisson regression is centered around mean", { w <- y + 1 model_weighted <- xgb.train( data = xgb.DMatrix(x, label = y, weight = w), - params = list(objective = "count:poisson", gamma = 1e4), + params = xgb.params(objective = "count:poisson", gamma = 1e4), nrounds = 1 ) model_json <- xgb.save.raw(model_weighted, "json") |> rawToChar() |> jsonlite::fromJSON() diff --git a/R-package/tests/testthat/test_unicode.R b/R-package/tests/testthat/test_unicode.R index 5c8acc1b0c92..efdb32ac31f3 100644 --- a/R-package/tests/testthat/test_unicode.R +++ b/R-package/tests/testthat/test_unicode.R @@ -8,9 +8,15 @@ set.seed(1994) test_that("Can save and load models with Unicode paths", { nrounds <- 2 - bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, - eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic", - eval_metric = "error") + bst <- xgb.train( + data = xgb.DMatrix(train$data, label = train$label), + nrounds = nrounds, + params = xgb.params( + max_depth = 2, + eta = 1, nthread = 2, + objective = "binary:logistic" + ) + ) tmpdir <- tempdir() lapply(c("모델.json", "がうる・ぐら.json", "类继承.ubj"), function(x) { path <- file.path(tmpdir, x) diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index 8347d0ee0a84..ed82e3b8ce49 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -174,7 +174,7 @@ The code below is very usual. For more information, you can look at the document ```{r} bst <- xgboost(x = sparse_matrix, y = output_vector, - params = list(max_depth = 4, eta = 1), + max_depth = 4, eta = 1, nthread = 2, nrounds = 10) ``` @@ -302,12 +302,10 @@ test <- agaricus.test bst <- xgboost( x = train$data, y = factor(train$label, levels = c(0, 1)), - params = list( - max_depth = 4, - num_parallel_tree = 1000, - subsample = 0.5, - colsample_bytree = 0.5 - ), + max_depth = 4, + num_parallel_tree = 1000, + subsample = 0.5, + colsample_bytree = 0.5, nrounds = 1, nthread = 2 ) @@ -316,7 +314,7 @@ bst <- xgboost( bst <- xgboost( x = train$data, y = factor(train$label, levels = c(0, 1)), - params = list(max_depth = 4), + max_depth = 4, nrounds = 3, nthread = 2 ) diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index 6d800e63dcd5..444f7ba96fa8 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -157,12 +157,12 @@ as a named list: ```{r} bstTrInterface <- xgb.train( data = xgb.DMatrix(train$data, label = train$label, nthread = 1) - , params = list( + , params = xgb.params( objective = "binary:logistic" , max_depth = 2 , eta = 1 + , nthread = 2 ) - , nthread = 2 , nrounds = 2 ) ``` @@ -343,9 +343,9 @@ For a better understanding of the learning progression, you may want to have som ```{r evals2, message=F, warning=F} bst <- xgb.train( data = dtrain - , max_depth = 2 , params = list( eta = 1 + , max_depth = 2 , nthread = 2 , objective = "binary:logistic" , eval_metric = "error" diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd index e5331b0ff38c..2fbffb3b46b9 100644 --- a/R-package/vignettes/xgboostfromJSON.Rmd +++ b/R-package/vignettes/xgboostfromJSON.Rmd @@ -54,10 +54,12 @@ data <- data.frame(dates = dates, labels = labels) bst <- xgb.train( data = xgb.DMatrix(as.matrix(data$dates), label = labels, missing = NA), - nthread = 2, nrounds = 1, - objective = "binary:logistic", - max_depth = 1 + params = xgb.params( + objective = "binary:logistic", + nthread = 2, + max_depth = 1 + ) ) ```