From d1fcef8224ff8bec2c39aa496bc0d91d32de9732 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Sun, 9 Feb 2025 10:06:02 +0100
Subject: [PATCH] update prediction docs

---
 R-package/R/xgb.Booster.R            | 4 +++-
 R-package/R/xgboost.R                | 2 +-
 R-package/man/predict.xgb.Booster.Rd | 4 +++-
 R-package/man/predict.xgboost.Rd     | 2 +-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index f04a4918dd92..72fecac4cd34 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -94,12 +94,14 @@ xgb.get.handle <- function(object) {
 #'   - Columns will be converted to numeric if they aren't already, which could potentially make
 #'     the operation slower than in an equivalent `matrix` object.
 #'   - The order of the columns must match with that of the data from which the model was fitted
-#'     (i.e. columns will not be referenced by their names, just by their order in the data).
+#'     (i.e. columns will not be referenced by their names, just by their order in the data),
+#'     unless passing `validate_features = TRUE` (which is not the default).
 #'   - If the model was fitted to data with categorical columns, these columns must be of
 #'     `factor` type here, and must use the same encoding (i.e. have the same levels).
 #'   - If `newdata` contains any `factor` columns, they will be converted to base-0
 #'     encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
 #'     under a column which during training had a different type.
+#'   - Any columns with type other than `factor` will be interpreted as numeric.
 #' @param missing Float value that represents missing values in data
 #'   (e.g., 0 or some other extreme value).
 #'
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index af63dc434c8e..581185605c51 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -1247,7 +1247,7 @@ xgboost <- function(
 #'
 #' In the case of data frames, if there are any categorical features, they should be of class
 #' `factor` and should have the same levels as the `factor` columns of the data from which the model
-#' was constructed.
+#' was constructed. Any columns with type other than `factor` will be interpreted as numeric.
 #'
 #' If there are named columns and the model was fitted to data with named columns, they will be
 #' matched by name by default (see `validate_features`).
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index ee0307bf3f47..2e5232055b96 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -40,12 +40,14 @@ If \code{newdata} is a \code{data.frame}, be aware that:
 \item Columns will be converted to numeric if they aren't already, which could potentially make
 the operation slower than in an equivalent \code{matrix} object.
 \item The order of the columns must match with that of the data from which the model was fitted
-(i.e. columns will not be referenced by their names, just by their order in the data).
+(i.e. columns will not be referenced by their names, just by their order in the data),
+unless passing \code{validate_features = TRUE} (which is not the default).
 \item If the model was fitted to data with categorical columns, these columns must be of
 \code{factor} type here, and must use the same encoding (i.e. have the same levels).
 \item If \code{newdata} contains any \code{factor} columns, they will be converted to base-0
 encoding (same as during DMatrix creation) - hence, one should not pass a \code{factor}
 under a column which during training had a different type.
+\item Any columns with type other than \code{factor} will be interpreted as numeric.
 }}
 
 \item{missing}{Float value that represents missing values in data
diff --git a/R-package/man/predict.xgboost.Rd b/R-package/man/predict.xgboost.Rd
index 15e75965aaa6..7b212bb25a24 100644
--- a/R-package/man/predict.xgboost.Rd
+++ b/R-package/man/predict.xgboost.Rd
@@ -33,7 +33,7 @@ observation.
 
 In the case of data frames, if there are any categorical features, they should be of class
 \code{factor} and should have the same levels as the \code{factor} columns of the data from which the model
-was constructed.
+was constructed. Any columns with type other than \code{factor} will be interpreted as numeric.
 
 If there are named columns and the model was fitted to data with named columns, they will be
 matched by name by default (see \code{validate_features}).}