Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R-package] Add remainder of prediction funtions #5312

Merged
merged 39 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
77ea844
remainder of prediction functions
david-cortes Jun 20, 2022
f562002
missing parenthesis
david-cortes Jun 20, 2022
01db0a6
remove expectation on row names for sparse vectors
david-cortes Jun 20, 2022
0ac916b
Update R-package/R/lgb.Booster.R
david-cortes Jun 22, 2022
7b7b6c1
Update R-package/R/lgb.Booster.R
david-cortes Jun 22, 2022
e3ca970
readust spacing
david-cortes Jun 22, 2022
9a466da
change method to private
david-cortes Jun 22, 2022
e8f7925
fix unpassed arguments
david-cortes Jun 22, 2022
ef63f03
update docs
david-cortes Jun 22, 2022
9017435
Update R-package/R/lgb.Predictor.R
david-cortes Aug 16, 2022
8a07d19
Update R-package/R/lgb.Predictor.R
david-cortes Aug 16, 2022
511fde7
Update R-package/R/lgb.Predictor.R
david-cortes Aug 16, 2022
94a3cc3
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
321d35a
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
9535ec7
solve merge conflicts
david-cortes Aug 16, 2022
6042e4e
Merge branch 'Rcsr2' of github.com:david-cortes/lightgbm into Rcsr2
david-cortes Aug 16, 2022
0c35953
Update R-package/R/lgb.Predictor.R
david-cortes Aug 16, 2022
f608756
Update R-package/R/lgb.restore_handle.R
david-cortes Aug 16, 2022
a124284
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
a75e0cd
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
d6f8876
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
b33849d
Update R-package/R/lgb.Booster.R
david-cortes Aug 16, 2022
59b4615
change doc style
david-cortes Aug 16, 2022
76ec594
solve merge conflicts
david-cortes Aug 16, 2022
6dbaf73
add tests for helper function
david-cortes Aug 16, 2022
5a707fc
add donttest around fast predict example
david-cortes Aug 16, 2022
b17ae8d
add more context to warning
david-cortes Aug 16, 2022
dbe57c4
use new prediction type argument in tests
david-cortes Aug 16, 2022
c9b65f1
Update R-package/R/lgb.Predictor.R
david-cortes Aug 16, 2022
c80f9c1
move fast-predict config attributes to private
david-cortes Aug 16, 2022
34ca577
rebuild docs
david-cortes Aug 16, 2022
cfc8a21
move more fast-predict config attributes to private
david-cortes Aug 16, 2022
c32166e
linter
david-cortes Aug 16, 2022
2f7469f
update predlead -> type=leaf
david-cortes Aug 16, 2022
bf03e49
add note about type=class in fast predict
david-cortes Aug 16, 2022
f37481d
Update R-package/src/lightgbm_R.h
david-cortes Aug 19, 2022
8184402
Update R-package/src/lightgbm_R.h
david-cortes Aug 19, 2022
82ba3c7
Update R-package/src/lightgbm_R.h
david-cortes Aug 19, 2022
51ebbb6
Update R-package/src/lightgbm_R.h
david-cortes Aug 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export(lgb.Dataset.create.valid)
export(lgb.Dataset.save)
export(lgb.Dataset.set.categorical)
export(lgb.Dataset.set.reference)
export(lgb.configure_fast_predict)
export(lgb.convert_with_rules)
export(lgb.cv)
export(lgb.drop_serialized)
Expand All @@ -37,6 +38,8 @@ export(saveRDS.lgb.Booster)
export(set_field)
export(slice)
import(methods)
importClassesFrom(Matrix,CsparseMatrix)
importClassesFrom(Matrix,RsparseMatrix)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgRMatrix)
importClassesFrom(Matrix,dsparseMatrix)
Expand Down
206 changes: 195 additions & 11 deletions R-package/R/lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ Booster <- R6::R6Class(
predictor <- Predictor$new(
modelfile = private$handle
, params = params
, fast_predict_config = private$fast_predict_config
)
return(
predictor$predict(
Expand All @@ -550,6 +551,57 @@ Booster <- R6::R6Class(
return(Predictor$new(modelfile = private$handle))
},

configure_fast_predict = function(csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
params = list()) {

self$restore_handle()
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)

if (is.null(num_iteration)) {
num_iteration <- -1L
}
if (is.null(start_iteration)) {
start_iteration <- 0L
}

if (!csr) {
fun <- LGBM_BoosterPredictForMatSingleRowFastInit_R
} else {
fun <- LGBM_BoosterPredictForCSRSingleRowFastInit_R
}

fast_handle <- .Call(
fun
, private$handle
, ncols
, rawscore
, predleaf
, predcontrib
, start_iteration
, num_iteration
, lgb.params2str(params = params)
)

private$fast_predict_config <- list(
handle = fast_handle
, csr = as.logical(csr)
, ncols = ncols
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = as.logical(rawscore)
, predleaf = as.logical(predleaf)
, predcontrib = as.logical(predcontrib)
jameslamb marked this conversation as resolved.
Show resolved Hide resolved
, params = params
)

return(invisible(NULL))
},

# Used for serialization
raw = NULL,

Expand Down Expand Up @@ -601,6 +653,7 @@ Booster <- R6::R6Class(
higher_better_inner_eval = NULL,
set_objective_to_none = FALSE,
train_set_version = 0L,
fast_predict_config = list(),
# Predict data
inner_predict = function(idx) {

Expand Down Expand Up @@ -748,18 +801,15 @@ Booster <- R6::R6Class(
)
)

#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or
#' a character representing a path to a text file (CSV, TSV, or LibSVM)
#' @name lgb_predict_shared_params
#' @param type Type of prediction to output. Allowed types are:\itemize{
#' \item \code{"response"}: will output the predicted score according to the objective function being
#' optimized (depending on the link function that the objective uses), after applying any necessary
#' transformations - for example, for \code{objective="binary"}, it will output class probabilities.
#' \item \code{"class"}: for classification objectives, will output the class with the highest predicted
#' probability. For other objectives, will output the same as "response".
#' probability. For other objectives, will output the same as "response". Note that \code{"class"} is
#' not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function
#' for more details).
#' \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
#' results) from which the "response" number is produced for a given objective function - for example,
#' for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
Expand All @@ -780,12 +830,34 @@ Booster <- R6::R6Class(
#' If None, if the best iteration exists and start_iteration is None or <= 0, the
#' best iteration is used; otherwise, all iterations from start_iteration are used.
#' If <= 0, all iterations from start_iteration are used (no limits).
#' @param header only used for prediction for text file. True if text file has header
#' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
#' the "Predict Parameters" section of the documentation} for a list of parameters and
#' valid values. Where these conflict with the values of keyword arguments to this function,
#' the values in \code{params} take precedence.
NULL

#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @details If the model object has been configured for fast single-row predictions through
#' \link{lgb.configure_fast_predict}, this function will use the prediction parameters
#' that were configured for it - as such, extra prediction parameters should not be passed
#' here, otherwise the configuration will be ignored and the slow route will be taken.
#' @inheritParams lgb_predict_shared_params
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object,
#' or a character representing a path to a text file (CSV, TSV, or LibSVM).
#'
#' For sparse inputs, if predictions are only going to be made for a single row, it will be faster to
#' use CSR format, in which case the data may be passed as either a single-row CSR matrix (class
#' \code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class
#' \code{dsparseVector} from package \code{Matrix}).
#'
#' If single-row predictions are going to be performed frequently, it is recommended to
#' pre-configure the model object for fast single-row sparse predictions through function
#' \link{lgb.configure_fast_predict}.
#' @param header only used for prediction for text file. True if text file has header
#' @param ... ignored
#' @return For prediction types that are meant to always return one output per observation (e.g. when predicting
#' \code{type="response"} or \code{type="raw"} on a binary classification or regression objective), will
Expand Down Expand Up @@ -918,12 +990,124 @@ predict.lgb.Booster <- function(object,
return(pred)
}

#' @title Configure Fast Single-Row Predictions
#' @description Pre-configures a LightGBM model object to produce fast single-row predictions
#' for a given input data type, prediction type, and parameters.
#' @details Calling this function multiple times with different parameters might not override
#' the previous configuration and might trigger undefined behavior.
#'
#' Any saved configuration for fast predictions might be lost after making a single-row
#' prediction of a different type than what was configured (except for types "response" and
#' "class", which can be switched between each other at any time without losing the configuration).
#'
#' In some situations, setting a fast prediction configuration for one type of prediction
#' might cause the prediction function to keep using that configuration for single-row
#' predictions even if the requested type of prediction is different from what was configured.
#'
#' Note that this function will not accept argument \code{type="class"} - for such cases, one
#' can pass \code{type="response"} to this function and then \code{type="class"} to the
#' \code{predict} function - the fast configuration will not be lost or altered if the switch
#' is between "response" and "class".
#'
#' The configuration does not survive de-serializations, so it has to be generated
#' anew in every R process that is going to use it (e.g. if loading a model object
#' through \code{readRDS}, whatever configuration was there previously will be lost).
#'
#' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
#' will cause it to ignore the fast-predict configuration and take the slow route instead
#' (but be aware that an existing configuration might not always be overriden by supplying
#' different parameters or prediction type, so make sure to check that the output is what
#' was expected when a prediction is to be made on a single row for something different than
#' what is configured).
#'
#' Note that, if configuring a non-default prediction type (such as leaf indices),
#' then that type must also be passed in the call to \link{predict.lgb.Booster} in
#' order for it to use the configuration. This also applies for \code{start_iteration}
#' and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}.
#'
#' Predictions about feature contributions do not allow a fast route for CSR inputs,
#' and as such, this function will produce an error if passing \code{csr=TRUE} and
#' \code{type = "contrib"} together.
#' @inheritParams lgb_predict_shared_params
#' @param model LighGBM model object (class \code{lgb.Booster}).
#'
#' \bold{The object will be modified in-place}.
#' @param csr Whether the prediction function is going to be called on sparse CSR inputs.
#' If \code{FALSE}, will be assumed that predictions are going to be called on single-row
#' regular R matrices.
#' @return The same \code{model} that was passed as input, invisibly, with the desired
#' configuration stored inside it and available to be used in future calls to
#' \link{predict.lgb.Booster}.
#' @examples
jameslamb marked this conversation as resolved.
Show resolved Hide resolved
#' \donttest{
#' library(lightgbm)
#' data(mtcars)
#' X <- as.matrix(mtcars[, -1L])
#' y <- mtcars[, 1L]
#' dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
#' params <- list(min_data_in_leaf = 2L)
#' model <- lgb.train(
#' params = params
#' , data = dtrain
#' , obj = "regression"
#' , nrounds = 5L
#' , verbose = -1L
#' )
#' lgb.configure_fast_predict(model)
#'
#' x_single <- X[11L, , drop = FALSE]
#' predict(model, x_single)
#'
#' # Will not use it if the prediction to be made
#' # is different from what was configured
#' predict(model, x_single, type = "leaf")
#' }
#' @export
lgb.configure_fast_predict <- function(model,
csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
type = "response",
params = list()) {
if (!lgb.is.Booster(x = model)) {
stop("lgb.configure_fast_predict: model should be an ", sQuote("lgb.Booster"))
}
if (type == "class") {
stop("type='class' is not supported for 'lgb.configure_fast_predict'. Use 'response' instead.")
}

rawscore <- FALSE
predleaf <- FALSE
predcontrib <- FALSE
if (type == "raw") {
rawscore <- TRUE
} else if (type == "leaf") {
predleaf <- TRUE
} else if (type == "contrib") {
predcontrib <- TRUE
}

if (csr && predcontrib) {
stop("'lgb.configure_fast_predict' does not support feature contributions for CSR data.")
}
model$configure_fast_predict(
csr = csr
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, params = params
)
return(invisible(model))
}

#' @name print.lgb.Booster
#' @title Print method for LightGBM model
#' @description Show summary information about a LightGBM model object (same as \code{summary}).
#' @param x Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `x`, returned as invisible.
#' @return The same input \code{x}, returned as invisible.
#' @export
print.lgb.Booster <- function(x, ...) {
# nolint start
Expand Down Expand Up @@ -972,7 +1156,7 @@ print.lgb.Booster <- function(x, ...) {
#' @description Show summary information about a LightGBM model object (same as \code{print}).
#' @param object Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `object`, returned as invisible.
#' @return The same input \code{object}, returned as invisible.
#' @export
summary.lgb.Booster <- function(object, ...) {
print(object)
Expand All @@ -983,7 +1167,7 @@ summary.lgb.Booster <- function(object, ...) {
#' @description Load LightGBM takes in either a file path or model string.
#' If both are provided, Load will default to loading from file
#' @param filename path of model file
#' @param model_str a str containing the model (as a `character` or `raw` vector)
#' @param model_str a str containing the model (as a \code{character} or \code{raw} vector)
#'
#' @return lgb.Booster
#'
Expand Down
Loading