Skip to content

Commit

Permalink
[R-package] Add remainder of prediction funtions (#5312)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored Aug 23, 2022
1 parent 01774bb commit 702db13
Show file tree
Hide file tree
Showing 15 changed files with 1,129 additions and 85 deletions.
3 changes: 3 additions & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export(lgb.Dataset.create.valid)
export(lgb.Dataset.save)
export(lgb.Dataset.set.categorical)
export(lgb.Dataset.set.reference)
export(lgb.configure_fast_predict)
export(lgb.convert_with_rules)
export(lgb.cv)
export(lgb.drop_serialized)
Expand All @@ -37,6 +38,8 @@ export(saveRDS.lgb.Booster)
export(set_field)
export(slice)
import(methods)
importClassesFrom(Matrix,CsparseMatrix)
importClassesFrom(Matrix,RsparseMatrix)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgRMatrix)
importClassesFrom(Matrix,dsparseMatrix)
Expand Down
206 changes: 195 additions & 11 deletions R-package/R/lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ Booster <- R6::R6Class(
predictor <- Predictor$new(
modelfile = private$handle
, params = params
, fast_predict_config = private$fast_predict_config
)
return(
predictor$predict(
Expand All @@ -550,6 +551,57 @@ Booster <- R6::R6Class(
return(Predictor$new(modelfile = private$handle))
},

configure_fast_predict = function(csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
params = list()) {

self$restore_handle()
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)

if (is.null(num_iteration)) {
num_iteration <- -1L
}
if (is.null(start_iteration)) {
start_iteration <- 0L
}

if (!csr) {
fun <- LGBM_BoosterPredictForMatSingleRowFastInit_R
} else {
fun <- LGBM_BoosterPredictForCSRSingleRowFastInit_R
}

fast_handle <- .Call(
fun
, private$handle
, ncols
, rawscore
, predleaf
, predcontrib
, start_iteration
, num_iteration
, lgb.params2str(params = params)
)

private$fast_predict_config <- list(
handle = fast_handle
, csr = as.logical(csr)
, ncols = ncols
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = as.logical(rawscore)
, predleaf = as.logical(predleaf)
, predcontrib = as.logical(predcontrib)
, params = params
)

return(invisible(NULL))
},

# Used for serialization
raw = NULL,

Expand Down Expand Up @@ -601,6 +653,7 @@ Booster <- R6::R6Class(
higher_better_inner_eval = NULL,
set_objective_to_none = FALSE,
train_set_version = 0L,
fast_predict_config = list(),
# Predict data
inner_predict = function(idx) {

Expand Down Expand Up @@ -748,18 +801,15 @@ Booster <- R6::R6Class(
)
)

#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or
#' a character representing a path to a text file (CSV, TSV, or LibSVM)
#' @name lgb_predict_shared_params
#' @param type Type of prediction to output. Allowed types are:\itemize{
#' \item \code{"response"}: will output the predicted score according to the objective function being
#' optimized (depending on the link function that the objective uses), after applying any necessary
#' transformations - for example, for \code{objective="binary"}, it will output class probabilities.
#' \item \code{"class"}: for classification objectives, will output the class with the highest predicted
#' probability. For other objectives, will output the same as "response".
#' probability. For other objectives, will output the same as "response". Note that \code{"class"} is
#' not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function
#' for more details).
#' \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
#' results) from which the "response" number is produced for a given objective function - for example,
#' for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
Expand All @@ -780,12 +830,34 @@ Booster <- R6::R6Class(
#' If None, if the best iteration exists and start_iteration is None or <= 0, the
#' best iteration is used; otherwise, all iterations from start_iteration are used.
#' If <= 0, all iterations from start_iteration are used (no limits).
#' @param header only used for prediction for text file. True if text file has header
#' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
#' the "Predict Parameters" section of the documentation} for a list of parameters and
#' valid values. Where these conflict with the values of keyword arguments to this function,
#' the values in \code{params} take precedence.
NULL

#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @details If the model object has been configured for fast single-row predictions through
#' \link{lgb.configure_fast_predict}, this function will use the prediction parameters
#' that were configured for it - as such, extra prediction parameters should not be passed
#' here, otherwise the configuration will be ignored and the slow route will be taken.
#' @inheritParams lgb_predict_shared_params
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object,
#' or a character representing a path to a text file (CSV, TSV, or LibSVM).
#'
#' For sparse inputs, if predictions are only going to be made for a single row, it will be faster to
#' use CSR format, in which case the data may be passed as either a single-row CSR matrix (class
#' \code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class
#' \code{dsparseVector} from package \code{Matrix}).
#'
#' If single-row predictions are going to be performed frequently, it is recommended to
#' pre-configure the model object for fast single-row sparse predictions through function
#' \link{lgb.configure_fast_predict}.
#' @param header only used for prediction for text file. True if text file has header
#' @param ... ignored
#' @return For prediction types that are meant to always return one output per observation (e.g. when predicting
#' \code{type="response"} or \code{type="raw"} on a binary classification or regression objective), will
Expand Down Expand Up @@ -918,12 +990,124 @@ predict.lgb.Booster <- function(object,
return(pred)
}

#' @title Configure Fast Single-Row Predictions
#' @description Pre-configures a LightGBM model object to produce fast single-row predictions
#' for a given input data type, prediction type, and parameters.
#' @details Calling this function multiple times with different parameters might not override
#' the previous configuration and might trigger undefined behavior.
#'
#' Any saved configuration for fast predictions might be lost after making a single-row
#' prediction of a different type than what was configured (except for types "response" and
#' "class", which can be switched between each other at any time without losing the configuration).
#'
#' In some situations, setting a fast prediction configuration for one type of prediction
#' might cause the prediction function to keep using that configuration for single-row
#' predictions even if the requested type of prediction is different from what was configured.
#'
#' Note that this function will not accept argument \code{type="class"} - for such cases, one
#' can pass \code{type="response"} to this function and then \code{type="class"} to the
#' \code{predict} function - the fast configuration will not be lost or altered if the switch
#' is between "response" and "class".
#'
#' The configuration does not survive de-serializations, so it has to be generated
#' anew in every R process that is going to use it (e.g. if loading a model object
#' through \code{readRDS}, whatever configuration was there previously will be lost).
#'
#' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
#' will cause it to ignore the fast-predict configuration and take the slow route instead
#' (but be aware that an existing configuration might not always be overriden by supplying
#' different parameters or prediction type, so make sure to check that the output is what
#' was expected when a prediction is to be made on a single row for something different than
#' what is configured).
#'
#' Note that, if configuring a non-default prediction type (such as leaf indices),
#' then that type must also be passed in the call to \link{predict.lgb.Booster} in
#' order for it to use the configuration. This also applies for \code{start_iteration}
#' and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}.
#'
#' Predictions about feature contributions do not allow a fast route for CSR inputs,
#' and as such, this function will produce an error if passing \code{csr=TRUE} and
#' \code{type = "contrib"} together.
#' @inheritParams lgb_predict_shared_params
#' @param model LighGBM model object (class \code{lgb.Booster}).
#'
#' \bold{The object will be modified in-place}.
#' @param csr Whether the prediction function is going to be called on sparse CSR inputs.
#' If \code{FALSE}, will be assumed that predictions are going to be called on single-row
#' regular R matrices.
#' @return The same \code{model} that was passed as input, invisibly, with the desired
#' configuration stored inside it and available to be used in future calls to
#' \link{predict.lgb.Booster}.
#' @examples
#' \donttest{
#' library(lightgbm)
#' data(mtcars)
#' X <- as.matrix(mtcars[, -1L])
#' y <- mtcars[, 1L]
#' dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
#' params <- list(min_data_in_leaf = 2L)
#' model <- lgb.train(
#' params = params
#' , data = dtrain
#' , obj = "regression"
#' , nrounds = 5L
#' , verbose = -1L
#' )
#' lgb.configure_fast_predict(model)
#'
#' x_single <- X[11L, , drop = FALSE]
#' predict(model, x_single)
#'
#' # Will not use it if the prediction to be made
#' # is different from what was configured
#' predict(model, x_single, type = "leaf")
#' }
#' @export
lgb.configure_fast_predict <- function(model,
csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
type = "response",
params = list()) {
if (!lgb.is.Booster(x = model)) {
stop("lgb.configure_fast_predict: model should be an ", sQuote("lgb.Booster"))
}
if (type == "class") {
stop("type='class' is not supported for 'lgb.configure_fast_predict'. Use 'response' instead.")
}

rawscore <- FALSE
predleaf <- FALSE
predcontrib <- FALSE
if (type == "raw") {
rawscore <- TRUE
} else if (type == "leaf") {
predleaf <- TRUE
} else if (type == "contrib") {
predcontrib <- TRUE
}

if (csr && predcontrib) {
stop("'lgb.configure_fast_predict' does not support feature contributions for CSR data.")
}
model$configure_fast_predict(
csr = csr
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, params = params
)
return(invisible(model))
}

#' @name print.lgb.Booster
#' @title Print method for LightGBM model
#' @description Show summary information about a LightGBM model object (same as \code{summary}).
#' @param x Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `x`, returned as invisible.
#' @return The same input \code{x}, returned as invisible.
#' @export
print.lgb.Booster <- function(x, ...) {
# nolint start
Expand Down Expand Up @@ -972,7 +1156,7 @@ print.lgb.Booster <- function(x, ...) {
#' @description Show summary information about a LightGBM model object (same as \code{print}).
#' @param object Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `object`, returned as invisible.
#' @return The same input \code{object}, returned as invisible.
#' @export
summary.lgb.Booster <- function(object, ...) {
print(object)
Expand All @@ -983,7 +1167,7 @@ summary.lgb.Booster <- function(object, ...) {
#' @description Load LightGBM takes in either a file path or model string.
#' If both are provided, Load will default to loading from file
#' @param filename path of model file
#' @param model_str a str containing the model (as a `character` or `raw` vector)
#' @param model_str a str containing the model (as a \code{character} or \code{raw} vector)
#'
#' @return lgb.Booster
#'
Expand Down
Loading

0 comments on commit 702db13

Please sign in to comment.