From d1fcef8224ff8bec2c39aa496bc0d91d32de9732 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sun, 9 Feb 2025 10:06:02 +0100 Subject: [PATCH] update prediction docs --- R-package/R/xgb.Booster.R | 4 +++- R-package/R/xgboost.R | 2 +- R-package/man/predict.xgb.Booster.Rd | 4 +++- R-package/man/predict.xgboost.Rd | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index f04a4918dd92..72fecac4cd34 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -94,12 +94,14 @@ xgb.get.handle <- function(object) { #' - Columns will be converted to numeric if they aren't already, which could potentially make #' the operation slower than in an equivalent `matrix` object. #' - The order of the columns must match with that of the data from which the model was fitted -#' (i.e. columns will not be referenced by their names, just by their order in the data). +#' (i.e. columns will not be referenced by their names, just by their order in the data), +#' unless passing `validate_features = TRUE` (which is not the default). #' - If the model was fitted to data with categorical columns, these columns must be of #' `factor` type here, and must use the same encoding (i.e. have the same levels). #' - If `newdata` contains any `factor` columns, they will be converted to base-0 #' encoding (same as during DMatrix creation) - hence, one should not pass a `factor` #' under a column which during training had a different type. +#' - Any columns with type other than `factor` will be interpreted as numeric. #' @param missing Float value that represents missing values in data #' (e.g., 0 or some other extreme value). #' diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index af63dc434c8e..581185605c51 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -1247,7 +1247,7 @@ xgboost <- function( #' #' In the case of data frames, if there are any categorical features, they should be of class #' `factor` and should have the same levels as the `factor` columns of the data from which the model -#' was constructed. +#' was constructed. Any columns with type other than `factor` will be interpreted as numeric. #' #' If there are named columns and the model was fitted to data with named columns, they will be #' matched by name by default (see `validate_features`). diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index ee0307bf3f47..2e5232055b96 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -40,12 +40,14 @@ If \code{newdata} is a \code{data.frame}, be aware that: \item Columns will be converted to numeric if they aren't already, which could potentially make the operation slower than in an equivalent \code{matrix} object. \item The order of the columns must match with that of the data from which the model was fitted -(i.e. columns will not be referenced by their names, just by their order in the data). +(i.e. columns will not be referenced by their names, just by their order in the data), +unless passing \code{validate_features = TRUE} (which is not the default). \item If the model was fitted to data with categorical columns, these columns must be of \code{factor} type here, and must use the same encoding (i.e. have the same levels). \item If \code{newdata} contains any \code{factor} columns, they will be converted to base-0 encoding (same as during DMatrix creation) - hence, one should not pass a \code{factor} under a column which during training had a different type. +\item Any columns with type other than \code{factor} will be interpreted as numeric. }} \item{missing}{Float value that represents missing values in data diff --git a/R-package/man/predict.xgboost.Rd b/R-package/man/predict.xgboost.Rd index 15e75965aaa6..7b212bb25a24 100644 --- a/R-package/man/predict.xgboost.Rd +++ b/R-package/man/predict.xgboost.Rd @@ -33,7 +33,7 @@ observation. In the case of data frames, if there are any categorical features, they should be of class \code{factor} and should have the same levels as the \code{factor} columns of the data from which the model -was constructed. +was constructed. Any columns with type other than \code{factor} will be interpreted as numeric. If there are named columns and the model was fitted to data with named columns, they will be matched by name by default (see \code{validate_features}).}