diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 02e886bbcbac..8c7c8ffd7d68 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -4,6 +4,10 @@ S3method("dimnames<-",lgb.Dataset)
 S3method(dim,lgb.Dataset)
 S3method(dimnames,lgb.Dataset)
 S3method(get_field,lgb.Dataset)
+S3method(lightgbm,data.frame)
+S3method(lightgbm,dgCMatrix)
+S3method(lightgbm,formula)
+S3method(lightgbm,matrix)
 S3method(predict,lgb.Booster)
 S3method(print,lgb.Booster)
 S3method(set_field,lgb.Dataset)
@@ -38,11 +42,16 @@ export(saveRDS.lgb.Booster)
 export(set_field)
 export(slice)
 import(methods)
+importClassesFrom(Matrix,CsparseMatrix)
+importClassesFrom(Matrix,dgCMatrix)
+importClassesFrom(Matrix,sparseMatrix)
+importClassesFrom(Matrix,sparseVector)
 importFrom(Matrix,Matrix)
 importFrom(R6,R6Class)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
 importFrom(data.table,data.table)
+importFrom(data.table,is.data.table)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setnames)
@@ -51,8 +60,11 @@ importFrom(data.table,setorderv)
 importFrom(graphics,barplot)
 importFrom(graphics,par)
 importFrom(jsonlite,fromJSON)
+importFrom(methods,as)
 importFrom(methods,is)
+importFrom(parallel,detectCores)
 importFrom(stats,quantile)
+importFrom(utils,head)
 importFrom(utils,modifyList)
 importFrom(utils,read.delim)
 useDynLib(lib_lightgbm , .registration = TRUE)
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 311d3f2b910c..a0eddd9d9d65 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -9,6 +9,7 @@ Booster <- R6::R6Class(
     best_score = NA_real_,
     params = list(),
     record_evals = list(),
+    data_processor = NULL,
 
     # Finalize will free up the handles
     finalize = function() {
@@ -497,6 +498,10 @@ Booster <- R6::R6Class(
 
       self$restore_handle()
 
+      if (!is.null(self$data_processor)) {
+        data <- self$data_processor$process_new_data(data)
+      }
+
       if (is.null(num_iteration)) {
         num_iteration <- self$best_iter
       }
@@ -510,19 +515,20 @@ Booster <- R6::R6Class(
         modelfile = private$handle
         , params = params
       )
-      return(
-        predictor$predict(
-          data = data
-          , start_iteration = start_iteration
-          , num_iteration = num_iteration
-          , rawscore = rawscore
-          , predleaf = predleaf
-          , predcontrib = predcontrib
-          , header = header
-          , reshape = reshape
-        )
+      pred <- predictor$predict(
+        data = data
+        , start_iteration = start_iteration
+        , num_iteration = num_iteration
+        , rawscore = rawscore
+        , predleaf = predleaf
+        , predcontrib = predcontrib
+        , header = header
+        , reshape = reshape
       )
-
+      if (!predleaf && !is.null(self$data_processor)) {
+        pred <- self$data_processor$process_predictions(pred, predcontrib)
+      }
+      return(pred)
     },
 
     # Transform into predictor
@@ -729,10 +735,60 @@ Booster <- R6::R6Class(
 
 #' @name predict.lgb.Booster
 #' @title Predict method for LightGBM model
-#' @description Predicted values based on class \code{lgb.Booster}
-#' @param object Object of class \code{lgb.Booster}
-#' @param data a \code{matrix} object, a \code{dgCMatrix} object or
-#'             a character representing a path to a text file (CSV, TSV, or LibSVM)
+#' @description Predict values on new data based on a boosting model (class \code{lgb.Booster}).
+#' @param object Object of class \code{lgb.Booster} from which to make predictions.
+#' @param newdata New data on which to make predictions. Allowed types are:\itemize{
+#'                \item `data.frame`, \bold{only if} the model object was produced through the \link{lightgbm}
+#'                      interface. If the input to \link{lightgbm} was a `formula` or a `data.frame` with
+#'                      categorical columns (`factor` or `character`), then \bold{only} `data.frame` inputs will
+#'                      be accepted here. Columns will be taken according to the names that they had in the data
+#'                      that they were passed to the model (i.e. the input here will be reordered if the order
+#'                      does not match, and will be subsetted if it has additional columns).
+#'                \item `matrix` from base R. Will be converted to numeric if it isn't already.
+#'                \item `dgCMatrix` from package `Matrix`.
+#'                \item `character` with a single entry representing a path to a text file in CSV, TSV,
+#'                      or SVMLight / LibSVM formats.
+#'                }
+#'                Other input types are not allowed.
+#'
+#'                Note that, if using the `formula` interface, the user is responsible for making
+#'                factor variables' levels match to those that were passed in the data to which the model
+#'                was fitted, and if the model was not produced through the \link{lightgbm} interface
+#'                (e.g. through \link{lgb.train} or \link{lgb.cv}), then the user is responsible for
+#'                handling the encoding of categorical variables.
+#' @param type Type of prediction to output. Allowed types are:\itemize{
+#'             \item `"score"`, which will output the predicted score according to the function
+#'                   objective function being optimized (equivalent to `"link"` in base R's `glm`) - for
+#'                   example, for `objective="binary"`, it will output probabilities, while for
+#'                   `objective="regression"`, it will output predicted values. For objective functions other
+#'                   than multi-class classification, the result will be a numeric vector with number of rows
+#'                   matching to `nrow(newdata)`. For multi-class classification, if passing `reshape=TRUE`,
+#'                   it will output a matrix with columns matching to the number of classes (and if the model
+#'                   object was produced through the \link{lightgbm} interface instead of through
+#'                   \link{lgb.train} or \link{lgb.cv}, it will have class names as column names if available),
+#'                   and if passing `reshape=FALSE`, will output a numeric vector with these same results in
+#'                   row-major order.
+#'              \item `"class"` (only for binary and multi-class classification objectives), which will
+#'                    output the class with the highest predicted score. If the model object was produced through
+#'                    the \link{lightgbm} interface and the label was a factor variable, the result will be a
+#'                    factor variable with levels matching to classes, otherwise it will be an integer vector
+#'                    with indicating the class number.
+#'              \item `"raw"`, which will output the non-transformed numbers (sum of predictions from
+#'                    boosting iterations' results) from which the score is produced for a given objective
+#'                    function - for example, for `objective="binary"`, this corresponds to log-odds. The
+#'                    output type is the same as for `type="score"`.
+#'              \item `"leaf"`, which will output the index of the terminal node / leaf at which
+#'                    each observations falls in each tree in the model, outputted as as integers. If passing
+#'                    `reshape=TRUE`, the result will be a matrix with number of columns matching to number of
+#'                    trees, otherwise it will be a vector with this same matrix in row-major order.
+#'              \item `"contrib"`, which will return the per-feature contributions for each prediction.
+#'                    If passing `reshape=TRUE`, the result will be a matrix with number of columns matching
+#'                    to number of features that the model saw while fitting, otherwise will be a vector with
+#'                    this same matrix outputted in row-major order. If the model object was produced through
+#'                    the \link{lightgbm} interface, `reshape=TRUE` is passed, and the data to which the model
+#'                    was fit had column names, then the output matrix will have column names corresponding to
+#'                    the feature names.
+#'               }
 #' @param start_iteration int or None, optional (default=None)
 #'                        Start index of the iteration to predict.
 #'                        If None or <= 0, starts from the first iteration.
@@ -741,26 +797,25 @@ Booster <- R6::R6Class(
 #'                      If None, if the best iteration exists and start_iteration is None or <= 0, the
 #'                      best iteration is used; otherwise, all iterations from start_iteration are used.
 #'                      If <= 0, all iterations from start_iteration are used (no limits).
-#' @param rawscore whether the prediction should be returned in the for of original untransformed
-#'                 sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
-#'                 for logistic regression would result in predictions for log-odds instead of probabilities.
-#' @param predleaf whether predict leaf index instead.
-#' @param predcontrib return per-feature contributions for each record.
 #' @param header only used for prediction for text file. True if text file has header
 #' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
-#'                prediction outputs per case.
+#'                prediction outputs per case. When using `reshape=FALSE`, the output will
+#'                be in row-major order (contrary to R matrices which assume column-major order).
+#'                If passing `reshape=TRUE` and `newdata` has row names, the output will also have those
+#'                row names.
+#' @param index1 When producing outputs that correspond to some numeration (such as
+#'               `type="class"` or `type="leaf"`), whether to make these outputs have a numeration
+#'               starting at 1 or at zero. Note that the underlying lightgbm core library uses zero-based
+#'               numeration, thus `index1=FALSE` will be slightly faster.
 #' @param params a list of additional named parameters. See
 #'               \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
 #'               the "Predict Parameters" section of the documentation} for a list of parameters and
 #'               valid values.
-#' @param ... ignored
-#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}.
-#'         For multiclass classification, either a \code{num_class * nrows(data)} vector or
-#'         a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
-#'         the \code{reshape} value.
-#'
-#'         When \code{predleaf = TRUE}, the output is a matrix object with the
-#'         number of columns corresponding to the number of trees.
+#' @param ... Ignored.
+#' @return Either a matrix with number of rows matching to the number of rows in `newdata`, or
+#'         a vector with number of entries matching to rows in `newdata`, or a vector representing a
+#'         matrix in row-major order with number of entries matching to `nrow(newdata)*n_outputs`;
+#'         depending on the requested `type` and `reshape` parameter.
 #'
 #' @examples
 #' \donttest{
@@ -797,14 +852,13 @@ Booster <- R6::R6Class(
 #' @importFrom utils modifyList
 #' @export
 predict.lgb.Booster <- function(object,
-                                data,
+                                newdata,
+                                type = c("score", "class", "raw", "leaf", "contrib"),
                                 start_iteration = NULL,
                                 num_iteration = NULL,
-                                rawscore = FALSE,
-                                predleaf = FALSE,
-                                predcontrib = FALSE,
                                 header = FALSE,
-                                reshape = FALSE,
+                                reshape = TRUE,
+                                index1 = TRUE,
                                 params = list(),
                                 ...) {
 
@@ -812,6 +866,30 @@ predict.lgb.Booster <- function(object,
     stop("predict.lgb.Booster: object should be an ", sQuote("lgb.Booster"))
   }
 
+  if (!is.character(type)) {
+    stop("'type' must be a character variable.")
+  }
+  type <- type[1L]
+  allowed_type <- c("score", "class", "raw", "leaf", "contrib")
+  if (!(type %in% allowed_type)) {
+    stop(sprintf("'type' must be one of the following: %s"
+                 , paste(allowed_type, collapse = ", ")))
+  }
+  if (type == "class") {
+    reshape <- TRUE
+  }
+  rawscore <- type == "raw"
+  predleaf <- type == "leaf"
+  predcontrib <- type == "contrib"
+  if (type == "class") {
+    classif_objectives <- c("binary", "multiclass", "multiclassova")
+    if (!(object$params$objective %in% classif_objectives)) {
+      stop(sprintf(paste0("Passed prediction 'type=class', but model is not a classifier"
+                          , "(objective: %s).")
+                   , object$params$objective))
+    }
+  }
+
   additional_params <- list(...)
   if (length(additional_params) > 0L) {
     warning(paste0(
@@ -821,19 +899,51 @@ predict.lgb.Booster <- function(object,
     ))
   }
 
-  return(
-    object$predict(
-      data = data
-      , start_iteration = start_iteration
-      , num_iteration = num_iteration
-      , rawscore = rawscore
-      , predleaf =  predleaf
-      , predcontrib =  predcontrib
-      , header = header
-      , reshape = reshape
-      , params = params
-    )
+  pred <- object$predict(
+    data = newdata
+    , start_iteration = start_iteration
+    , num_iteration = num_iteration
+    , rawscore = rawscore
+    , predleaf = predleaf
+    , predcontrib = predcontrib
+    , header = header
+    , reshape = reshape
+    , params = params
   )
+  if (type == "class") {
+    if (object$params$objective == "binary") {
+      pred <- as.integer(pred >= 0.5)
+      if (NROW(object$data_processor$label_levels)) {
+        pred <- pred + 1L
+        attributes(pred)$levels <- object$data_processor$label_levels
+        attributes(pred)$class <- "factor"
+      } else if (index1) {
+        pred <- pred + 1L
+      }
+    } else {
+      cnames <- colnames(pred)
+      pred <- max.col(pred)
+      if (NROW(cnames)) {
+        if (!is.integer(pred)) {
+          pred <- as.integer(pred)
+        }
+        attributes(pred)$levels <- cnames
+        attributes(pred)$class <- "factor"
+      } else if (!index1) {
+        pred <- pred - 1L
+      }
+    }
+  } else if (type == "leaf" && index1) {
+    pred <- pred + 1L
+  }
+  if (reshape && NROW(row.names(newdata))) {
+    if (is.null(dim(pred))) {
+      names(pred) <- row.names(newdata)
+    } else {
+      row.names(pred) <- row.names(newdata)
+    }
+  }
+  return(pred)
 }
 
 #' @name print.lgb.Booster
diff --git a/R-package/R/lgb.DataProcessor.R b/R-package/R/lgb.DataProcessor.R
new file mode 100644
index 000000000000..c468f3352230
--- /dev/null
+++ b/R-package/R/lgb.DataProcessor.R
@@ -0,0 +1,304 @@
+#' @importFrom data.table is.data.table
+#' @importFrom methods as
+#' @importClassesFrom Matrix sparseVector sparseMatrix CsparseMatrix dgCMatrix
+
+DataProcessor <- R6::R6Class(
+  classname = "lgb.DataProcessor",
+  public = list(
+    ncols = NULL,
+    colnames = NULL,
+    factor_levels = NULL,
+    formula = NULL,
+    formula_terms = NULL,
+    formula_predict = NULL,
+    label_levels = NULL,
+    initialize = function(env_out,
+                          data,
+                          params,
+                          model_formula = NULL,
+                          label = NULL,
+                          weights = NULL,
+                          init_score = NULL) {
+
+      if (!is.null(model_formula)) {
+
+        if (!is.data.frame(data)) {
+          stop("'lightgbm()' formula interface is only supported for 'data.frame' inputs.")
+        }
+        self$formula <- model_formula
+        formula_terms <- as.character(model_formula)
+        formula_terms[3L] <- paste0(formula_terms[3L], "-1")
+        model_formula <- paste0(formula_terms[2L], formula_terms[1L], formula_terms[3L])
+        model_formula <- as.formula(model_formula)
+        self$formula_terms <- terms(model_formula, data = data)
+        self$formula_predict <- delete.response(self$formula_terms)
+        model_frame <- model.frame(model_formula, data, na.action = NULL)
+        label <- model.response(model_frame, type = "any")
+        data <- model.matrix(self$formula_predict, data = model_frame)
+
+      } else {
+
+        self$colnames <- colnames(data)
+
+        if (NROW(self$colnames)) {
+
+          # A replacement of 'deparse1' which was added in R 4.0.0,
+          # added for compatibility with older R versions
+          deparse1_ <- function(x) paste(deparse(x, width.cutoff = 500L), collapse = "")
+
+          label_nse <- substitute(label)
+          label_nse <- eval.parent(substitute(substitute(label_nse)), n = 2L)
+          label_nse <- deparse1_(label_nse)
+          if (label_nse != "NULL" && label_nse %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != label_nse]
+            if (data.table::is.data.table(data)) {
+              label <- data[, label_nse, with = FALSE, drop = TRUE]
+            } else {
+              label <- data[, label_nse, drop = TRUE]
+            }
+          } else if (is.character(label) && NROW(label) == 1L && label %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != label]
+            if (data.table::is.data.table(data)) {
+              label <- data[, label, with = FALSE, drop = TRUE]
+            } else {
+              label <- data[, label, drop = TRUE]
+            }
+          }
+
+          weights_nse <- substitute(weights)
+          weights_nse <- eval.parent(substitute(substitute(weights_nse)), n = 2L)
+          weights_nse <- deparse1_(weights_nse)
+          if (weights_nse != "NULL" && weights_nse %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != weights_nse]
+            if (data.table::is.data.table(data)) {
+              weights <- data[, weights_nse, with = FALSE, drop = TRUE]
+            } else {
+              weights <- data[, weights_nse, drop = TRUE]
+            }
+          } else if (is.character(weights) && NROW(weights) == 1L && weights %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != weights]
+            if (data.table::is.data.table(data)) {
+              weights <- data[, weights, with = FALSE, drop = TRUE]
+            } else {
+              weights <- data[, weights, drop = TRUE]
+            }
+          }
+
+          init_score_nse <- substitute(init_score)
+          init_score_nse <- eval.parent(substitute(substitute(init_score_nse)), n = 2L)
+          init_score_nse <- deparse1_(init_score_nse)
+          if (init_score_nse != "NULL" && init_score_nse %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != init_score_nse]
+            if (data.table::is.data.table(data)) {
+              init_score <- data[, init_score_nse, with = FALSE, drop = TRUE]
+            } else {
+              init_score <- data[, init_score_nse, drop = TRUE]
+            }
+          } else if (is.character(init_score) && NROW(init_score) == 1L && init_score %in% self$colnames) {
+            self$colnames <- self$colnames[self$colnames != init_score]
+            if (data.table::is.data.table(data)) {
+              init_score <- data[, init_score, with = FALSE, drop = TRUE]
+            } else {
+              init_score <- data[, init_score, drop = TRUE]
+            }
+          }
+
+          if (length(self$colnames) < ncol(data)) {
+            if (data.table::is.data.table(data)) {
+              data <- data[, self$colnames, with = FALSE, drop = FALSE]
+            } else {
+              data <- data[, self$colnames, drop = FALSE]
+            }
+          }
+        } else {
+          self$colnames <- NULL
+        }
+
+        self$ncols <- ncol(data)
+
+        if (is.data.frame(data)) {
+
+          supported_types <- c("numeric", "integer", "factor", "character", "Date", "POSIXct")
+          coltype_is_supported <- sapply(data, function(col) inherits(col, supported_types))
+          if (!all(coltype_is_supported)) {
+            unsupported_types <- unique(unlist(lapply(
+              data
+              , function(col) if (inherits(col, supported_types)) NULL else type(col)
+            )))
+            stop(sprintf("Error: 'lightgbm()' received 'data' with unsupported column types: %s"
+                         , paste(head(unsupported_types, 5L)), collapse = ", "))
+          }
+
+          data <- data.table::as.data.table(data)
+
+          cols_char <- names(data)[sapply(data, is.character)]
+          if (NROW(cols_char)) {
+            suppressWarnings(data[, (cols_char) := lapply(.SD, factor), .SDcols = cols_char])
+          }
+
+          cols_factors <- names(data)[sapply(data, is.factor)]
+          if (NROW(cols_factors)) {
+            has_ordered_factor <- any(sapply(data, is.ordered))
+            if (has_ordered_factor) {
+              warning(paste0("'lighgbm()' was passed data with ordered factors."
+                             , "The order in factor levels is ignored."))
+            }
+            self$factor_levels <- lapply(data[, cols_factors, with = FALSE, drop = FALSE], levels)
+            data[
+              , (cols_factors) := lapply(.SD, function(x) {
+                x <- as.numeric(x) - 1.0
+                x[is.na(x)] <- -1.0
+                return(x)
+              })
+              , .SDcols = cols_factors
+            ]
+
+            params$categorical_feature <- which(names(data) %in% cols_factors)
+          } else {
+            params$categorical_feature <- NULL
+          }
+
+          data <- as.matrix(data, drop = FALSE)
+        }
+      }
+
+      if (is.character(label)) {
+        label <- factor(label)
+      }
+      if (!is.factor(label)) {
+        label <- as.numeric(label)
+        env_out$objective <- "regression"
+      } else {
+        self$label_levels <- levels(label)
+        if (length(levels(label)) <= 1L) {
+          stop("Labels to predict is a factor with <2 possible values.")
+        } else if (length(levels(label)) == 2L) {
+          env_out$objective <- "binary"
+        } else {
+          env_out$objective <- "multiclass"
+        }
+        label <- as.numeric(label) - 1.0
+      }
+
+      if (!is.numeric(label)) {
+        label <- as.numeric(label)
+      }
+      if (length(label) != nrow(data)) {
+        stop("Labels to predict must have length equal to the number of rows in 'X'/'data'.")
+      }
+
+      if (!is.null(weights)) {
+        weights <- as.numeric(weights)
+        if (length(weights) != nrow(data)) {
+          stop("'weights' must have length equal to the number of rows in 'X'/'data'.")
+        }
+      }
+      if (!is.null(init_score)) {
+        init_score <- as.numeric(init_score)
+        if (length(weights) != nrow(data)) {
+          stop("'init_score' must have length equal to the number of rows in 'X'/'data'.")
+        }
+      }
+
+      dataset <- lgb.Dataset(
+        data = data
+        , label = label
+        , weight = weights
+        , init_score = init_score
+        , params = params
+      )
+      env_out$dataset <- dataset
+    },
+
+    process_new_data = function(data) {
+      if (!is.null(self$formula_predict)) {
+
+        data <- model.matrix(self$formula_predict, data = data)
+
+      } else {
+
+        if (is.null(dim(data))) {
+          if (inherits(data, "sparseVector")) {
+            data <- t(as(data, "CsparseMatrix"))
+            if (!inherits(data, "dgCMatrix")) {
+              data <- as(data, "dgCMatrix")
+            }
+          } else {
+            data <- matrix(data, nrow = 1L)
+          }
+        }
+
+        if (ncol(data) < self$ncols) {
+          stop(sprintf("New data has fewer columns than expected (%d vs %d)"
+                       , ncol(data), self$ncols))
+        }
+
+        if (NROW(self$colnames)) {
+          if (data.table::is.data.table(data)) {
+            data <- data[, self$colnames, with = FALSE, drop = FALSE]
+          } else {
+            data <- data[, self$colnames, drop = FALSE]
+          }
+        } else {
+          if (ncol(data) > self$ncols) {
+            if (data.table::is.data.table(data)) {
+              data <- data[, 1L:self$ncols, with = FALSE, drop = FALSE]
+            } else {
+              data <- data[, 1L:self$ncols, drop = FALSE]
+            }
+          }
+        }
+
+        if (NROW(self$factor_levels)) {
+          if (!is.data.frame(data)) {
+            stop(paste0("When calling 'lightgbm()' on a 'data.frame' with factor columns,"
+                        , "new data to predict on must also be passed as 'data.frame'."))
+          }
+          data <- as.data.table(data)
+          cols_cat <- names(self$factor_levels)
+          data[
+            , (cols_cat) := mapply(
+              factor
+              , .SD
+              , self$factor_levels
+              , SIMPLIFY = FALSE
+            )
+            , .SDcols = cols_cat
+          ][
+            , (cols_cat) := lapply(.SD, function(x) {
+              x <- as.numeric(x) - 1.0
+              x[is.na(x)] <- -1.0
+              return(x)
+            })
+            , .SDcols = cols_cat
+          ]
+        }
+      }
+
+      if (is.data.frame(data)) {
+        data <- as.matrix(data, drop = FALSE)
+      }
+
+      return(data)
+    },
+
+    process_predictions = function(pred, is_contrib = FALSE) {
+      if (!is_contrib && NROW(self$label_levels)) {
+        if (is.matrix(pred) && ncol(pred) == length(self$label_levels)) {
+          colnames(pred) <- self$label_levels
+        }
+      }
+      if (is_contrib) {
+        if (NROW(self$colnames) && ncol(pred) == NROW(self$colnames) + 1L) {
+          colnames(pred) <- c(self$colnames, "(Intercept)")
+        } else if (!is.null(self$formula_terms)) {
+          term_labels <- attributes(self$formula_terms)$term.labels
+          if (length(term_labels) + 1L == ncol(pred)) {
+            colnames(pred) <- c(term_labels, "(Intercept)")
+          }
+        }
+      }
+      return(pred)
+    }
+  )
+)
diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R
index be3036a52986..ca36749b1187 100644
--- a/R-package/R/lgb.restore_handle.R
+++ b/R-package/R/lgb.restore_handle.R
@@ -15,8 +15,9 @@
 #' model <- lightgbm(
 #'   agaricus.train$data
 #'   , agaricus.train$label
-#'   , params = list(objective = "binary", nthreads = 1L)
+#'   , objective = "binary"
 #'   , nrounds = 5L
+#'   , nthreads = 1L
 #'   , save_name = NULL
 #'   , verbose = 0)
 #' fname <- tempfile(fileext="rds")
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index b40c8cc21c04..e0944075e4d8 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -86,68 +86,376 @@
 #' @keywords internal
 NULL
 
-#' @name lightgbm
+#' @rdname lightgbm
 #' @title Train a LightGBM model
-#' @description Simple interface for training a LightGBM model.
+#' @description Simplified interface for training / fitting a LightGBM model which follows typical
+#'              R idioms for model fitting and predictions. Note that this interface does not
+#'              expose the full spectrum of library features as \link{lgb.train} does.
+#' @details This is a thin wrapper over \link{lgb.Dataset} and then \link{lgb.train} which performs
+#'          extra steps such as  automatically detecting categorical variables and handling their
+#'          encoding. It is intended as an easy-to-use interface that follows common R idioms for
+#'          predictive models.
+#'
+#'          It uses base R's functions for processing the data, such as `factor`, which are not
+#'          particularly efficient - for serious usage, it is recommended to use the \link{lgb.train}
+#'          interface with \link{lgb.Dataset} instead, handling aspects such as encoding of categorical
+#'          variables externally through your favorite tools.
+#'
+#'          \bold{Important:} using the `formula` interface relies on R's own formula handling, which
+#'          might be very slow for large inputs and will dummy-encode all categorical variables
+#'          (meaning: they will not be treated as categorical in tree splits, rather each level will be
+#'          treated as a separate variable, without exploiting the sparsity and independence patterns
+#'          in the encoded data).
+#'
+#'          When models are produced through this interface (as opposed to \link{lgb.train}), the
+#'          method \link{predict.lgb.Booster} will additionally gain new behaviors, such as taking
+#'          columns by name from the new input data or adding names to the resulting predicted matrices
+#'          (based on the classes or features depending on what is being predicted).
 #' @inheritParams lgb_shared_params
-#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
-#' @param weight vector of response values. If not NULL, will set to dataset
+#' @param formula A formula for specifying the response/label and predictors/features in the
+#'                model to be fitted. This is provided for ease of use, but using the `formula` interface
+#'                is discouraged for a couple reasons (see details section for mode details):\itemize{
+#'                \item It converts all factor variables to dummy encoding, which typically does not lead to
+#'                      models as good as those in which categorical variables are treated as such.
+#'                \item It uses base R's formula handling for inputs, which can be particularly
+#'                      computationally inefficient compared to the alternatives.
+#'                \item If the number of variables is large, it can increase model size quite a bit.
+#'                }
+#'
+#'                If using the `formula` interface, then `data` must be a `data.frame`.
+#' @param data A `data.frame`. In the non-formula interface, it will use all available variables
+#'             (those not specified as being `label`, `weight`, or `init_score`) as features / predictors,
+#'             and will assume their types are:\itemize{
+#'             \item Numeric, if they are of type `numeric`, `integer`, `Date`, `POSIXct`.
+#'             \item Categorical, if they are of type `factor`, `character`.
+#'             }
+#'
+#'             Other variable types are not accepted. Note that the underlying core library only accepts
+#'             `numeric` inputs, thus other types will end up being casted.
+#'
+#'             Note that, if using the `data.frame` interface, it is not possible to manually specify
+#'             categorical variables through `params` - instead, these will be deduced from the data types,
+#'             and their encoding will be handled internally in the fitting and prediction functions.
+#'             Under the `data.frame` interface, if the data contains any categorical variables, then at
+#'             prediction time only `data.frame` inputs will be allowed.
+#' @param X Data features / covariates / predictors with which the model will try to predict `y`.
+#'
+#'          Note that, if using non-standard evaluation for `y`, `weights`, or `init_score` (specifying
+#'          them as column names from `X`), then `X` will be subsetted, and any additional parameters
+#'          passed that correspond to column indices (such as per-column `max_bin` or
+#'          `categorical_features`) will be applied on the subsetted data.
+#'
+#'          Supports dense matrices from base R (class `matrix`, will be casted to `double` storage
+#'          mode if it isn't already) and sparse matrices in CSC format from the `Matrix` package
+#'          (class `dgCMatrix`).
+#' @param y,label Target / response variable to predict. May be passed as:\itemize{
+#'                \item The name of a column from `X` / `data`, if it has column names. Will use non-standard
+#'                      evaluation in order to try to determine if it matches with the name of a column in
+#'                      `X` / `data` (i.e. will accept it as the name of a column without putting quotes
+#'                      around it), and can also be passed as a character.
+#'                 \item A vector with the number of entries matching to the number of rows in `X` / `data`.
+#'                 }
+#'                 If passing `objective="auto"`, the optimization objective will be determined according to
+#'                 the type / class of this variable.
+#'
+#'                 If `y` is passed as a factor, then `num_class` in `params` will be set automatically
+#'                 according to its levels.
+#'
+#'                 Passing `y` as a factor will also make \link{predict.lgb.Booster} use its levels in the
+#'                 outputs from predictions when appropriate.
+#' @param weights Sample / observation weights for rows in `X` / `data`. Same format as
+#'                `y` (i.e. accepts non-standard evaluation for column names, and accepts numeric vectors).
+#' @param init_score Initial values for each observation from which the boosting process will
+#'                   be started (e.g. as the result of some previous model). If not passing it (the default),
+#'                   will start from a blank state.
+#' @param objective Optimization objective (e.g. `"regression"`, `"binary"`, etc.).
+#'                  For a list of accepted objectives, see
+#'                  \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
+#'                  the "Parameters" section of the documentation}.
+#'
+#'                  If passing `"auto"`, will be deduced from the type of `y` / `label`:\itemize{
+#'                  \item If `y` is not a factor, will set the objective to `"regression"`.
+#'                  \item If `y` is a factor with two classes, will set the objective to `"binary"`.
+#'                  \item If `y` is a factor with more than two classes, will set the objective to `"multiclass"`.
+#'                  }
+#'
+#'                  If `y` is a factor, then it will automatically set parameter `num_classes` based on
+#'                  its number of levels, overriding any such entry in `params` if it is present there.
+#' @param nthreads Number of parallel threads to use. For best speed, this should be set to the number of
+#'                 physical cores in the CPU - in a typical x86-64 machine, this corresponds to half the
+#'                 number of maximum threads (e.g. `nthreads = max(parallel::detectCores() / 2L, 1L)` as
+#'                 a shorthand for the optimal value).
+#'
+#'                 Be aware that using too many threads can result in speed degradation in smaller datasets
+#'                 (see the parameters documentation for more details).
+#'
+#'                 If passing zero, will use the default number of threads configured for OpenMP.
+#'
+#'                 This parameter overrides `num_threads` in `params` if it exists there.
+#' @param dataset_params Extra parameters to pass to \link{lgb.Dataset} once it comes the
+#'                       time to convert the dataset to this library's internal format.
+#'
+#'                       For a list of the accepted parameters, see
+#'                       \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#io-parameters}{
+#'                       the "I/O Parameters" section of the documentation}.
 #' @param save_name File name to use when writing the trained model to disk. Should end in ".model".
 #'                  If passing `NULL`, will not save the trained model to disk.
-#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
+#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example:
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
-#'        \item{\code{obj}: objective function, can be character or custom objective function. Examples include
-#'                   \code{regression}, \code{regression_l1}, \code{huber},
-#'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
 #'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
 #'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
 #'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-#'                            say "the first and tenth columns").}
+#'              names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
+#'              say "the first and tenth columns"). This parameter is not supported in the `formula` and
+#'              `data.frame` interfaces.}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'     }
 #' @inheritSection lgb_shared_params Early Stopping
-#' @return a trained \code{lgb.Booster}
+#' @return A trained \code{lgb.Booster} model object.
+#' @importFrom utils head
+#' @importFrom parallel detectCores
+#' @examples
+#' library(lightgbm)
+#' data("iris")
+#' model <- lightgbm(Species ~ ., data = iris, verbose = -1L, nthreads = 1L)
+#' pred <- predict(model, iris, type = "class")
+#' all(pred == iris$Species)
+#'
+#' model <- lightgbm(iris, Species, verbose = -1L, nthreads = 1L)
+#' head(predict(model, iris, type = "score"))
+#'
+#' model <- lightgbm(as.matrix(iris[, -5L]), iris$Species, verbose = -1L, nthreads = 1L)
+#' head(predict(model, iris, type = "raw"))
+#' @export
+lightgbm <- function(...) {
+  UseMethod("lightgbm")
+}
+
+#' @rdname lightgbm
 #' @export
-lightgbm <- function(data,
-                     label = NULL,
-                     weight = NULL,
-                     params = list(),
-                     nrounds = 100L,
-                     verbose = 1L,
-                     eval_freq = 1L,
-                     early_stopping_rounds = NULL,
-                     save_name = "lightgbm.model",
-                     init_model = NULL,
-                     callbacks = list(),
-                     serializable = TRUE,
-                     ...) {
+lightgbm.formula <- function(formula,
+                             data,
+                             weights = NULL,
+                             init_score = NULL,
+                             objective = "auto",
+                             nrounds = 100L,
+                             nthreads = parallel::detectCores(),
+                             params = list(),
+                             dataset_params = list(),
+                             verbose = 1L,
+                             eval_freq = 1L,
+                             early_stopping_rounds = NULL,
+                             save_name = NULL,
+                             serializable = TRUE,
+                             ...
+                             ) {
+  data_processor_outputs <- new.env()
+  data_processor <- DataProcessor$new(
+    data_processor_outputs
+    , data
+    , dataset_params
+    , model_formula = formula
+    , label = NULL
+    , weights = weights
+    , init_score = init_score
+  )
+  return(
+    lightgbm_internal(
+      data_processor_outputs = data_processor_outputs
+      , data_processor = data_processor
+      , objective = objective
+      , nthreads = nthreads
+      , params = params
+      , nrounds = nrounds
+      , verbose = verbose
+      , eval_freq = eval_freq
+      , early_stopping_rounds = early_stopping_rounds
+      , save_name = save_name
+      , serializable = serializable
+      , ...
+    )
+  )
+}
+
+#' @rdname lightgbm
+#' @export
+lightgbm.data.frame <- function(data,
+                                label,
+                                weights = NULL,
+                                init_score = NULL,
+                                objective = "auto",
+                                nrounds = 100L,
+                                nthreads = parallel::detectCores(),
+                                params = list(),
+                                dataset_params = list(),
+                                verbose = 1L,
+                                eval_freq = 1L,
+                                early_stopping_rounds = NULL,
+                                save_name = NULL,
+                                serializable = TRUE,
+                                ...) {
+  if (!is.null(params$categorical_feature) || !is.null(dataset_params$categorical_feature)) {
+    stop("'categorical_feature' is not supported for 'data.frame' inputs in 'lightgbm()'.")
+  }
+  data_processor_outputs <- new.env()
+  data_processor <- DataProcessor$new(
+    data_processor_outputs
+    , as.data.frame(data)
+    , dataset_params
+    , model_formula = NULL
+    , label = label
+    , weights = weights
+    , init_score = init_score
+  )
+  return(
+    lightgbm_internal(
+      data_processor_outputs = data_processor_outputs
+      , data_processor = data_processor
+      , objective = objective
+      , nthreads = nthreads
+      , params = params
+      , nrounds = nrounds
+      , verbose = verbose
+      , eval_freq = eval_freq
+      , early_stopping_rounds = early_stopping_rounds
+      , save_name = save_name
+      , serializable = serializable
+      , ...
+    )
+  )
+}
+
+#' @rdname lightgbm
+#' @export
+lightgbm.matrix <- function(X,
+                            y,
+                            weights = NULL,
+                            init_score = NULL,
+                            objective = "auto",
+                            nrounds = 100L,
+                            nthreads = parallel::detectCores(),
+                            params = list(),
+                            dataset_params = list(),
+                            verbose = 1L,
+                            eval_freq = 1L,
+                            early_stopping_rounds = NULL,
+                            save_name = NULL,
+                            serializable = TRUE,
+                            ...) {
+  data_processor_outputs <- new.env()
+  data_processor <- DataProcessor$new(
+    data_processor_outputs
+    , X
+    , dataset_params
+    , model_formula = NULL
+    , label = y
+    , weights = weights
+    , init_score = init_score
+  )
+  return(
+    lightgbm_internal(
+      data_processor_outputs = data_processor_outputs
+      , data_processor = data_processor
+      , objective = objective
+      , nthreads = nthreads
+      , params = params
+      , nrounds = nrounds
+      , verbose = verbose
+      , eval_freq = eval_freq
+      , early_stopping_rounds = early_stopping_rounds
+      , save_name = save_name
+      , serializable = serializable
+      , ...
+    )
+  )
+}
+
+#' @rdname lightgbm
+#' @export
+lightgbm.dgCMatrix <- function(X,
+                               y,
+                               weights = NULL,
+                               init_score = NULL,
+                               objective = "auto",
+                               nrounds = 100L,
+                               nthreads = parallel::detectCores(),
+                               params = list(),
+                               dataset_params = list(),
+                               verbose = 1L,
+                               eval_freq = 1L,
+                               early_stopping_rounds = NULL,
+                               save_name = NULL,
+                               serializable = TRUE,
+                               ...) {
+  data_processor_outputs <- new.env()
+  data_processor <- DataProcessor$new(
+    data_processor_outputs
+    , X
+    , dataset_params
+    , model_formula = NULL
+    , label = y
+    , weights = weights
+    , init_score = init_score
+  )
+  return(
+    lightgbm_internal(
+      data_processor_outputs = data_processor_outputs
+      , data_processor = data_processor
+      , objective = objective
+      , nthreads = nthreads
+      , params = params
+      , nrounds = nrounds
+      , verbose = verbose
+      , eval_freq = eval_freq
+      , early_stopping_rounds = early_stopping_rounds
+      , save_name = save_name
+      , serializable = serializable
+      , ...
+    )
+  )
+}
+
+lightgbm_internal <- function(data_processor_outputs,
+                              data_processor,
+                              objective,
+                              nthreads,
+                              params = list(),
+                              nrounds = 100L,
+                              verbose = 1L,
+                              eval_freq = 1L,
+                              early_stopping_rounds = NULL,
+                              save_name = "lightgbm.model",
+                              serializable = TRUE,
+                              ...) {
+  if (objective == "auto") {
+    objective <- data_processor_outputs$objective
+  }
+  if (objective %in% c("multiclass", "multiclassova") && NROW(data_processor$label_levels)) {
+    if (!is.null(params$num_class)) {
+      warning("'num_class' is overriden when using 'lightgbm()' interface with factors.")
+    }
+    params$num_class <- length(data_processor$label_levels)
+  }
+  params$num_threads <- nthreads
 
   # validate inputs early to avoid unnecessary computation
   if (nrounds <= 0L) {
     stop("nrounds should be greater than zero")
   }
 
-  # Set data to a temporary variable
-  dtrain <- data
-
-  # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
-  if (!lgb.is.Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weight)
-  }
+  dtrain <- data_processor_outputs$dataset
 
   train_args <- list(
     "params" = params
     , "data" = dtrain
     , "nrounds" = nrounds
+    , "obj" = objective
     , "verbose" = verbose
     , "eval_freq" = eval_freq
     , "early_stopping_rounds" = early_stopping_rounds
-    , "init_model" = init_model
-    , "callbacks" = callbacks
     , "serializable" = serializable
   )
   train_args <- append(train_args, list(...))
@@ -166,6 +474,7 @@ lightgbm <- function(data,
     what = lgb.train
     , args = train_args
   )
+  bst$data_processor <- data_processor
 
   # Store model under a specific name
   if (!is.null(save_name)) {
diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd
index 199614241502..0c3504301907 100644
--- a/R-package/man/lgb.restore_handle.Rd
+++ b/R-package/man/lgb.restore_handle.Rd
@@ -25,8 +25,9 @@ data("agaricus.train")
 model <- lightgbm(
   agaricus.train$data
   , agaricus.train$label
-  , params = list(objective = "binary", nthreads = 1L)
+  , objective = "binary"
   , nrounds = 5L
+  , nthreads = 1L
   , save_name = NULL
   , verbose = 0)
 fname <- tempfile(fileext="rds")
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 1e6be676f62e..61579474d99b 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -2,37 +2,173 @@
 % Please edit documentation in R/lightgbm.R
 \name{lightgbm}
 \alias{lightgbm}
+\alias{lightgbm.formula}
+\alias{lightgbm.data.frame}
+\alias{lightgbm.matrix}
+\alias{lightgbm.dgCMatrix}
 \title{Train a LightGBM model}
 \usage{
-lightgbm(
+lightgbm(...)
+
+\method{lightgbm}{formula}(
+  formula,
+  data,
+  weights = NULL,
+  init_score = NULL,
+  objective = "auto",
+  nrounds = 100L,
+  nthreads = parallel::detectCores(),
+  params = list(),
+  dataset_params = list(),
+  verbose = 1L,
+  eval_freq = 1L,
+  early_stopping_rounds = NULL,
+  save_name = NULL,
+  serializable = TRUE,
+  ...
+)
+
+\method{lightgbm}{data.frame}(
   data,
-  label = NULL,
-  weight = NULL,
+  label,
+  weights = NULL,
+  init_score = NULL,
+  objective = "auto",
+  nrounds = 100L,
+  nthreads = parallel::detectCores(),
   params = list(),
+  dataset_params = list(),
+  verbose = 1L,
+  eval_freq = 1L,
+  early_stopping_rounds = NULL,
+  save_name = NULL,
+  serializable = TRUE,
+  ...
+)
+
+\method{lightgbm}{matrix}(
+  X,
+  y,
+  weights = NULL,
+  init_score = NULL,
+  objective = "auto",
   nrounds = 100L,
+  nthreads = parallel::detectCores(),
+  params = list(),
+  dataset_params = list(),
   verbose = 1L,
   eval_freq = 1L,
   early_stopping_rounds = NULL,
-  save_name = "lightgbm.model",
-  init_model = NULL,
-  callbacks = list(),
+  save_name = NULL,
+  serializable = TRUE,
+  ...
+)
+
+\method{lightgbm}{dgCMatrix}(
+  X,
+  y,
+  weights = NULL,
+  init_score = NULL,
+  objective = "auto",
+  nrounds = 100L,
+  nthreads = parallel::detectCores(),
+  params = list(),
+  dataset_params = list(),
+  verbose = 1L,
+  eval_freq = 1L,
+  early_stopping_rounds = NULL,
+  save_name = NULL,
   serializable = TRUE,
   ...
 )
 }
 \arguments{
-\item{data}{a \code{lgb.Dataset} object, used for training. Some functions, such as \code{\link{lgb.cv}},
-may allow you to pass other types of data like \code{matrix} and then separately supply
-\code{label} as a keyword argument.}
+\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example:
+\itemize{
+   \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
+   \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
+   \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
+   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
+   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
+         names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
+         say "the first and tenth columns"). This parameter is not supported in the `formula` and
+         `data.frame` interfaces.}
+   \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
+                     into a predictor model which frees up memory and the original datasets}
+}}
+
+\item{formula}{A formula for specifying the response/label and predictors/features in the
+               model to be fitted. This is provided for ease of use, but using the `formula` interface
+               is discouraged for a couple reasons (see details section for mode details):\itemize{
+               \item It converts all factor variables to dummy encoding, which typically does not lead to
+                     models as good as those in which categorical variables are treated as such.
+               \item It uses base R's formula handling for inputs, which can be particularly
+                     computationally inefficient compared to the alternatives.
+               \item If the number of variables is large, it can increase model size quite a bit.
+               }
 
-\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
+               If using the `formula` interface, then `data` must be a `data.frame`.}
 
-\item{weight}{vector of response values. If not NULL, will set to dataset}
+\item{data}{A `data.frame`. In the non-formula interface, it will use all available variables
+            (those not specified as being `label`, `weight`, or `init_score`) as features / predictors,
+            and will assume their types are:\itemize{
+            \item Numeric, if they are of type `numeric`, `integer`, `Date`, `POSIXct`.
+            \item Categorical, if they are of type `factor`, `character`.
+            }
+
+            Other variable types are not accepted. Note that the underlying core library only accepts
+            `numeric` inputs, thus other types will end up being casted.
+
+            Note that, if using the `data.frame` interface, it is not possible to manually specify
+            categorical variables through `params` - instead, these will be deduced from the data types,
+            and their encoding will be handled internally in the fitting and prediction functions.
+            Under the `data.frame` interface, if the data contains any categorical variables, then at
+            prediction time only `data.frame` inputs will be allowed.}
+
+\item{weights}{Sample / observation weights for rows in `X` / `data`. Same format as
+`y` (i.e. accepts non-standard evaluation for column names, and accepts numeric vectors).}
+
+\item{init_score}{Initial values for each observation from which the boosting process will
+be started (e.g. as the result of some previous model). If not passing it (the default),
+will start from a blank state.}
+
+\item{objective}{Optimization objective (e.g. `"regression"`, `"binary"`, etc.).
+                 For a list of accepted objectives, see
+                 \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
+                 the "Parameters" section of the documentation}.
+
+                 If passing `"auto"`, will be deduced from the type of `y` / `label`:\itemize{
+                 \item If `y` is not a factor, will set the objective to `"regression"`.
+                 \item If `y` is a factor with two classes, will set the objective to `"binary"`.
+                 \item If `y` is a factor with more than two classes, will set the objective to `"multiclass"`.
+                 }
+
+                 If `y` is a factor, then it will automatically set parameter `num_classes` based on
+                 its number of levels, overriding any such entry in `params` if it is present there.}
+
+\item{nrounds}{number of training rounds}
+
+\item{nthreads}{Number of parallel threads to use. For best speed, this should be set to the number of
+                physical cores in the CPU - in a typical x86-64 machine, this corresponds to half the
+                number of maximum threads (e.g. `nthreads = max(parallel::detectCores() / 2L, 1L)` as
+                a shorthand for the optimal value).
+
+                Be aware that using too many threads can result in speed degradation in smaller datasets
+                (see the parameters documentation for more details).
+
+                If passing zero, will use the default number of threads configured for OpenMP.
+
+                This parameter overrides `num_threads` in `params` if it exists there.}
 
 \item{params}{a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 the "Parameters" section of the documentation} for a list of parameters and valid values.}
 
-\item{nrounds}{number of training rounds}
+\item{dataset_params}{Extra parameters to pass to \link{lgb.Dataset} once it comes the
+                      time to convert the dataset to this library's internal format.
+
+                      For a list of the accepted parameters, see
+                      \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#io-parameters}{
+                      the "I/O Parameters" section of the documentation}.}
 
 \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
 
@@ -47,34 +183,65 @@ set to the iteration number of the best iteration.}
 \item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".
 If passing `NULL`, will not save the trained model to disk.}
 
-\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
-
-\item{callbacks}{List of callback functions that are applied at each iteration.}
-
 \item{serializable}{whether to make the resulting objects serializable through functions such as
 \code{save} or \code{saveRDS} (see section "Model serialization").}
 
-\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
-\itemize{
-   \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
-   \item{\code{obj}: objective function, can be character or custom objective function. Examples include
-              \code{regression}, \code{regression_l1}, \code{huber},
-               \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
-   \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
-   \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-                       names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-                       say "the first and tenth columns").}
-   \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
-                     into a predictor model which frees up memory and the original datasets}
-}}
+\item{X}{Data features / covariates / predictors with which the model will try to predict `y`.
+
+         Note that, if using non-standard evaluation for `y`, `weights`, or `init_score` (specifying
+         them as column names from `X`), then `X` will be subsetted, and any additional parameters
+         passed that correspond to column indices (such as per-column `max_bin` or
+         `categorical_features`) will be applied on the subsetted data.
+
+         Supports dense matrices from base R (class `matrix`, will be casted to `double` storage
+         mode if it isn't already) and sparse matrices in CSC format from the `Matrix` package
+         (class `dgCMatrix`).}
+
+\item{y, label}{Target / response variable to predict. May be passed as:\itemize{
+               \item The name of a column from `X` / `data`, if it has column names. Will use non-standard
+                     evaluation in order to try to determine if it matches with the name of a column in
+                     `X` / `data` (i.e. will accept it as the name of a column without putting quotes
+                     around it), and can also be passed as a character.
+                \item A vector with the number of entries matching to the number of rows in `X` / `data`.
+                }
+                If passing `objective="auto"`, the optimization objective will be determined according to
+                the type / class of this variable.
+
+                If `y` is passed as a factor, then `num_class` in `params` will be set automatically
+                according to its levels.
+
+                Passing `y` as a factor will also make \link{predict.lgb.Booster} use its levels in the
+                outputs from predictions when appropriate.}
 }
 \value{
-a trained \code{lgb.Booster}
+A trained \code{lgb.Booster} model object.
 }
 \description{
-Simple interface for training a LightGBM model.
+Simplified interface for training / fitting a LightGBM model which follows typical
+             R idioms for model fitting and predictions. Note that this interface does not
+             expose the full spectrum of library features as \link{lgb.train} does.
+}
+\details{
+This is a thin wrapper over \link{lgb.Dataset} and then \link{lgb.train} which performs
+         extra steps such as  automatically detecting categorical variables and handling their
+         encoding. It is intended as an easy-to-use interface that follows common R idioms for
+         predictive models.
+
+         It uses base R's functions for processing the data, such as `factor`, which are not
+         particularly efficient - for serious usage, it is recommended to use the \link{lgb.train}
+         interface with \link{lgb.Dataset} instead, handling aspects such as encoding of categorical
+         variables externally through your favorite tools.
+
+         \bold{Important:} using the `formula` interface relies on R's own formula handling, which
+         might be very slow for large inputs and will dummy-encode all categorical variables
+         (meaning: they will not be treated as categorical in tree splits, rather each level will be
+         treated as a separate variable, without exploiting the sparsity and independence patterns
+         in the encoded data).
+
+         When models are produced through this interface (as opposed to \link{lgb.train}), the
+         method \link{predict.lgb.Booster} will additionally gain new behaviors, such as taking
+         columns by name from the new input data or adding names to the resulting predicted matrices
+         (based on the classes or features depending on what is being predicted).
 }
 \section{Early Stopping}{
 
@@ -93,3 +260,16 @@ Simple interface for training a LightGBM model.
          or \code{objective} (passed into \code{params}).
 }
 
+\examples{
+library(lightgbm)
+data("iris")
+model <- lightgbm(Species ~ ., data = iris, verbose = -1L, nthreads = 1L)
+pred <- predict(model, iris, type = "class")
+all(pred == iris$Species)
+
+model <- lightgbm(iris, Species, verbose = -1L, nthreads = 1L)
+head(predict(model, iris, type = "score"))
+
+model <- lightgbm(as.matrix(iris[, -5L]), iris$Species, verbose = -1L, nthreads = 1L)
+head(predict(model, iris, type = "raw"))
+}
diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd
index 8948a4b17d01..8b4eaaa40e6b 100644
--- a/R-package/man/predict.lgb.Booster.Rd
+++ b/R-package/man/predict.lgb.Booster.Rd
@@ -6,23 +6,73 @@
 \usage{
 \method{predict}{lgb.Booster}(
   object,
-  data,
+  newdata,
+  type = c("score", "class", "raw", "leaf", "contrib"),
   start_iteration = NULL,
   num_iteration = NULL,
-  rawscore = FALSE,
-  predleaf = FALSE,
-  predcontrib = FALSE,
   header = FALSE,
-  reshape = FALSE,
+  reshape = TRUE,
+  index1 = TRUE,
   params = list(),
   ...
 )
 }
 \arguments{
-\item{object}{Object of class \code{lgb.Booster}}
+\item{object}{Object of class \code{lgb.Booster} from which to make predictions.}
 
-\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or
-a character representing a path to a text file (CSV, TSV, or LibSVM)}
+\item{newdata}{New data on which to make predictions. Allowed types are:\itemize{
+               \item `data.frame`, \bold{only if} the model object was produced through the \link{lightgbm}
+                     interface. If the input to \link{lightgbm} was a `formula` or a `data.frame` with
+                     categorical columns (`factor` or `character`), then \bold{only} `data.frame` inputs will
+                     be accepted here. Columns will be taken according to the names that they had in the data
+                     that they were passed to the model (i.e. the input here will be reordered if the order
+                     does not match, and will be subsetted if it has additional columns).
+               \item `matrix` from base R. Will be converted to numeric if it isn't already.
+               \item `dgCMatrix` from package `Matrix`.
+               \item `character` with a single entry representing a path to a text file in CSV, TSV,
+                     or SVMLight / LibSVM formats.
+               }
+               Other input types are not allowed.
+
+               Note that, if using the `formula` interface, the user is responsible for making
+               factor variables' levels match to those that were passed in the data to which the model
+               was fitted, and if the model was not produced through the \link{lightgbm} interface
+               (e.g. through \link{lgb.train} or \link{lgb.cv}), then the user is responsible for
+               handling the encoding of categorical variables.}
+
+\item{type}{Type of prediction to output. Allowed types are:\itemize{
+\item `"score"`, which will output the predicted score according to the function
+      objective function being optimized (equivalent to `"link"` in base R's `glm`) - for
+      example, for `objective="binary"`, it will output probabilities, while for
+      `objective="regression"`, it will output predicted values. For objective functions other
+      than multi-class classification, the result will be a numeric vector with number of rows
+      matching to `nrow(newdata)`. For multi-class classification, if passing `reshape=TRUE`,
+      it will output a matrix with columns matching to the number of classes (and if the model
+      object was produced through the \link{lightgbm} interface instead of through
+      \link{lgb.train} or \link{lgb.cv}, it will have class names as column names if available),
+      and if passing `reshape=FALSE`, will output a numeric vector with these same results in
+      row-major order.
+ \item `"class"` (only for binary and multi-class classification objectives), which will
+       output the class with the highest predicted score. If the model object was produced through
+       the \link{lightgbm} interface and the label was a factor variable, the result will be a
+       factor variable with levels matching to classes, otherwise it will be an integer vector
+       with indicating the class number.
+ \item `"raw"`, which will output the non-transformed numbers (sum of predictions from
+       boosting iterations' results) from which the score is produced for a given objective
+       function - for example, for `objective="binary"`, this corresponds to log-odds. The
+       output type is the same as for `type="score"`.
+ \item `"leaf"`, which will output the index of the terminal node / leaf at which
+       each observations falls in each tree in the model, outputted as as integers. If passing
+       `reshape=TRUE`, the result will be a matrix with number of columns matching to number of
+       trees, otherwise it will be a vector with this same matrix in row-major order.
+ \item `"contrib"`, which will return the per-feature contributions for each prediction.
+       If passing `reshape=TRUE`, the result will be a matrix with number of columns matching
+       to number of features that the model saw while fitting, otherwise will be a vector with
+       this same matrix outputted in row-major order. If the model object was produced through
+       the \link{lightgbm} interface, `reshape=TRUE` is passed, and the data to which the model
+       was fit had column names, then the output matrix will have column names corresponding to
+       the feature names.
+  }}
 
 \item{start_iteration}{int or None, optional (default=None)
 Start index of the iteration to predict.
@@ -34,37 +84,34 @@ If None, if the best iteration exists and start_iteration is None or <= 0, the
 best iteration is used; otherwise, all iterations from start_iteration are used.
 If <= 0, all iterations from start_iteration are used (no limits).}
 
-\item{rawscore}{whether the prediction should be returned in the for of original untransformed
-sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
-for logistic regression would result in predictions for log-odds instead of probabilities.}
-
-\item{predleaf}{whether predict leaf index instead.}
-
-\item{predcontrib}{return per-feature contributions for each record.}
-
 \item{header}{only used for prediction for text file. True if text file has header}
 
 \item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
-prediction outputs per case.}
+prediction outputs per case. When using `reshape=FALSE`, the output will
+be in row-major order (contrary to R matrices which assume column-major order).
+If passing `reshape=TRUE` and `newdata` has row names, the output will also have those
+row names.}
+
+\item{index1}{When producing outputs that correspond to some numeration (such as
+`type="class"` or `type="leaf"`), whether to make these outputs have a numeration
+starting at 1 or at zero. Note that the underlying lightgbm core library uses zero-based
+numeration, thus `index1=FALSE` will be slightly faster.}
 
 \item{params}{a list of additional named parameters. See
 \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
 the "Predict Parameters" section of the documentation} for a list of parameters and
 valid values.}
 
-\item{...}{ignored}
+\item{...}{Ignored.}
 }
 \value{
-For regression or binary classification, it returns a vector of length \code{nrows(data)}.
-        For multiclass classification, either a \code{num_class * nrows(data)} vector or
-        a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
-        the \code{reshape} value.
-
-        When \code{predleaf = TRUE}, the output is a matrix object with the
-        number of columns corresponding to the number of trees.
+Either a matrix with number of rows matching to the number of rows in `newdata`, or
+        a vector with number of entries matching to rows in `newdata`, or a vector representing a
+        matrix in row-major order with number of entries matching to `nrow(newdata)*n_outputs`;
+        depending on the requested `type` and `reshape` parameter.
 }
 \description{
-Predicted values based on class \code{lgb.Booster}
+Predict values on new data based on a boosting model (class \code{lgb.Booster}).
 }
 \examples{
 \donttest{
diff --git a/R-package/tests/testthat/test_DataProcessor.R b/R-package/tests/testthat/test_DataProcessor.R
new file mode 100644
index 000000000000..5e68299856bb
--- /dev/null
+++ b/R-package/tests/testthat/test_DataProcessor.R
@@ -0,0 +1,380 @@
+# Note: the lgb.DataProcessor class is meant to look for symbols in two
+# environments above from where it is called. Thus, it should not be called
+# or tested directly, only as part of calls to lightgbm().
+library(Matrix)
+data("iris")
+data("mtcars")
+data(bank, package = "lightgbm")
+
+test_that("lightgbm() and predict() work with formula interface", {
+  model <- lightgbm(
+    Species ~ .
+    , data = iris
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "class")
+  expect_true(all(pred == iris$Species))
+
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris$Species))
+
+  model <- lightgbm(
+    Species ~ . + log(Petal.Length) + I(Petal.Length^2.0) - Sepal.Width
+    , data = iris
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "class")
+  expect_true(all(pred == iris$Species))
+  expect_equal(5L, .Call(LGBM_BoosterGetNumFeature_R, model$.__enclos_env__$private$handle))
+})
+
+test_that("lightgbm() and predict() work with data.frame interface", {
+  model <- lightgbm(
+    iris
+    , Species
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "class")
+  expect_true(all(pred == iris$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris$Species))
+
+  model <- lightgbm(
+    iris
+    , "Species"
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "class")
+  expect_true(all(pred == iris$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris$Species))
+
+  model <- lightgbm(
+    iris[, -5L]
+    , iris$Species
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "class")
+  expect_true(all(pred == iris$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris$Species))
+})
+
+test_that("lightgbm() and predict() accept data.tables as data.frames", {
+  iris_dt <- data.table::as.data.table(iris)
+  model <- lightgbm(
+    iris_dt
+    , Species
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris_dt, type = "class")
+  expect_true(all(pred == iris_dt$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris_dt$Species))
+
+  model <- lightgbm(
+    iris_dt
+    , "Species"
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris_dt, type = "class")
+  expect_true(all(pred == iris$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris_dt$Species))
+
+  model <- lightgbm(
+    iris_dt[, -5L]
+    , iris_dt$Species
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris_dt, type = "class")
+  expect_true(all(pred == iris_dt$Species))
+  expect_s3_class(pred, "factor")
+  expect_equal(levels(pred), levels(iris_dt$Species))
+})
+
+test_that("lightgbm() and predict() work with matrix interface", {
+  model <- lightgbm(
+    as.matrix(mtcars)
+    , mpg
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred <- predict(model, mtcars)
+  expect_true(all(names(pred) == row.names(mtcars)))
+
+  model <- lightgbm(
+    as.matrix(mtcars)
+    , "mpg"
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred2 <- predict(model, mtcars)
+  expect_true(all(pred == pred2))
+
+  model <- lightgbm(
+    as.matrix(mtcars[, -1L])
+    , mtcars$mpg
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred3 <- predict(model, mtcars)
+  expect_true(all(pred == pred3))
+})
+
+test_that("lightgbm() and predict() work with dgCMatrix interface", {
+  model <- lightgbm(
+    as(as.matrix(mtcars), "dgCMatrix")
+    , mpg
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred <- predict(model, mtcars)
+  expect_true(all(names(pred) == row.names(mtcars)))
+
+  model <- lightgbm(
+    as(as.matrix(mtcars), "dgCMatrix")
+    , "mpg"
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred2 <- predict(model, mtcars)
+  expect_true(all(pred == pred2))
+
+  model <- lightgbm(
+    as(as.matrix(mtcars[, -1L]), "dgCMatrix")
+    , mtcars$mpg
+    , nthreads = 1L
+    , verbose = -1L
+    , nrounds = 5L
+    , params = list(
+      max_bins = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred3 <- predict(model, mtcars)
+  expect_true(all(pred == pred3))
+})
+
+test_that("lightgbm() handles single-column inputs", {
+  model <- lightgbm(
+    iris[, 1L, drop = FALSE]
+    , iris$Species
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred <- predict(model, iris, type = "score")
+  expect_equal(nrow(pred), nrow(iris))
+  expect_equal(ncol(pred), 3L)
+})
+
+test_that("lightbm() data.frame interface handles categorical features", {
+  model <- lightgbm(
+    bank
+    , y
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(
+    model$params$categorical_feature
+    , unname(which(sapply(within(bank, rm(y)), is.character)))
+  )
+})
+
+test_that("lightgbm() accepts dataset parameters", {
+  set.seed(123L)
+  df <- data.frame(col1 = c(runif(1000L), rep(0.0, 100L)))
+  df$col2 <- df$col1
+  n_bins <- 5L
+  model <- lightgbm(
+    df
+    , col2
+    , nthreads = 1L
+    , verbose = -1L
+    , params = list(max_bin = n_bins)
+  )
+  expect_equal(length(table(predict(model, df))), n_bins)
+
+  model <- lightgbm(
+    df
+    , col2
+    , nthreads = 1L
+    , verbose = -1L
+    , dataset_params = list(max_bin = n_bins)
+  )
+  expect_equal(length(table(predict(model, df))), n_bins)
+})
+
+test_that("lightgbm() accepts NSE for different arguments", {
+  iris_dt <- data.table::as.data.table(iris)
+  iris_dt[, wcol := 1.0]
+  model <- lightgbm(
+    iris_dt
+    , "Species"
+    , weights = wcol
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(
+    ncol(iris_dt) - 2L
+    , .Call(LGBM_BoosterGetNumFeature_R, model$.__enclos_env__$private$handle)
+  )
+})
+
+test_that("lightgbm() does not throw warnings in the presence of NAs", {
+  df <- data.frame(
+    col1 = rep(c(1.0, 2.0, NA), 100L)
+    , col2 = rep(c("a", NA, "b"), 100L)
+    , col3 = rep(c(1.0, 2.0, 1.0), 100L)
+  )
+  expect_warning({
+    model <- lightgbm(
+      df
+      , col3
+      , nrounds = 5L
+      , nthreads = 1L
+      , verbose = -1L
+    )
+    pred <- predict(model, df)
+  }, regexp = NA)
+})
+
+test_that("lightgbm() adjusts objective according to data", {
+  model <- lightgbm(
+    mpg ~ .
+    , data = mtcars
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(model$params$objective, "regression")
+
+  model <- lightgbm(
+    y ~ .
+    , data = bank
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(model$params$objective, "binary")
+
+  model <- lightgbm(
+    Species ~ .
+    , data = iris
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(model$params$objective, "multiclass")
+  expect_equal(model$params$num_class, length(levels(iris$Species)))
+
+  data("agaricus.train")
+  model <- lightgbm(
+    agaricus.train$data
+    , agaricus.train$label
+    , objective = "poisson"
+    , nrounds = 5L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  expect_equal(model$params$objective, "poisson")
+})
+
+test_that("predict() from lightgbm() names columns correctly", {
+  model <- lightgbm(
+    Species ~ .
+    , data = iris
+    , nrounds = 1L
+    , nthreads = 1L
+    , verbose = -1L
+  )
+  pred_score <- predict(model, iris, type = "score")
+  pred_class <- predict(model, iris, type = "class")
+  pred_leaf <- predict(model, iris, type = "leaf")
+  pred_contrib <- predict(model, iris, type = "contrib")
+
+  expect_equal(colnames(pred_score), levels(iris$Species))
+  expect_equal(levels(pred_class), levels(iris$Species))
+  expect_null(colnames(pred_leaf))
+  expect_null(colnames(pred_contrib))
+
+  model <- lightgbm(
+    mpg ~ .
+    , data = mtcars
+    , nrounds = 10L
+    , nthreads = 1L
+    , verbose = -1L
+    , params = list(
+      max_bin = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred_score <- predict(model, mtcars, type = "score")
+  expect_error(pred_class <- predict(model, mtcars, type = "class"))
+  pred_leaf <- predict(model, mtcars, type = "leaf")
+  pred_contrib <- predict(model, mtcars, type = "contrib")
+
+  expect_true(is.numeric(pred_score))
+  expect_null(dim(pred_score))
+  expect_null(colnames(pred_score))
+
+  expect_equal(ncol(pred_leaf), 10L)
+  expect_null(colnames(pred_leaf))
+  expect_equal(
+    colnames(pred_contrib)
+    , c(names(mtcars)[names(mtcars) != "mpg"], "(Intercept)")
+  )
+
+  model <- lightgbm(
+    mpg ~ cyl + wt
+    , data = mtcars
+    , nrounds = 10L
+    , nthreads = 1L
+    , verbose = -1L
+    , params = list(
+      max_bin = 5L
+      , min_data_in_leaf = 5L
+    )
+  )
+  pred_contrib <- predict(model, mtcars, type = "contrib")
+  expect_equal(
+    colnames(pred_contrib)
+    , c("cyl", "wt", "(Intercept)")
+  )
+})
diff --git a/R-package/tests/testthat/test_Predictor.R b/R-package/tests/testthat/test_Predictor.R
index 5a1927e4e512..014d285236f3 100644
--- a/R-package/tests/testthat/test_Predictor.R
+++ b/R-package/tests/testthat/test_Predictor.R
@@ -66,21 +66,22 @@ test_that("start_iteration works correctly", {
         , label = agaricus.test$label
     )
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 0.6
-            , objective = "binary"
             , verbosity = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = 50L
+        , nthreads = 1L
         , valids = list("test" = dtest)
         , early_stopping_rounds = 2L
     )
     expect_true(lgb.is.Booster(bst))
-    pred1 <- predict(bst, data = test$data, rawscore = TRUE)
-    pred_contrib1 <- predict(bst, test$data, predcontrib = TRUE)
+    pred1 <- predict(bst, newdata = test$data, type = "raw")
+    pred_contrib1 <- predict(bst, test$data, type = "contrib")
     pred2 <- rep(0.0, length(pred1))
     pred_contrib2 <- rep(0.0, length(pred2))
     step <- 11L
@@ -94,7 +95,7 @@ test_that("start_iteration works correctly", {
         inc_pred <- predict(bst, test$data
             , start_iteration = start_iter
             , num_iteration = n_iter
-            , rawscore = TRUE
+            , type = "raw"
         )
         inc_pred_contrib <- bst$predict(test$data
             , start_iteration = start_iter
@@ -107,7 +108,7 @@ test_that("start_iteration works correctly", {
     expect_equal(pred2, pred1)
     expect_equal(pred_contrib2, pred_contrib1)
 
-    pred_leaf1 <- predict(bst, test$data, predleaf = TRUE)
-    pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, predleaf = TRUE)
+    pred_leaf1 <- predict(bst, test$data, type = "leaf")
+    pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, type = "leaf")
     expect_equal(pred_leaf1, pred_leaf2)
 })
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 8b6f5f6ceb44..a91e888df9d0 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -72,14 +72,15 @@ DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset(
 test_that("train and predict binary classification", {
   nrounds <- 10L
   bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
         num_leaves = 5L
-        , objective = "binary"
         , metric = "binary_error"
     )
+    , objective = "binary"
     , nrounds = nrounds
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
   expect_false(is.null(bst$record_evals))
@@ -102,18 +103,19 @@ test_that("train and predict softmax", {
   lb <- as.numeric(iris$Species) - 1L
 
   bst <- lightgbm(
-    data = as.matrix(iris[, -5L])
-    , label = lb
+    X = as.matrix(iris[, -5L])
+    , y = lb
     , params = list(
         num_leaves = 4L
         , learning_rate = 0.05
         , min_data = 20L
         , min_hessian = 10.0
-        , objective = "multiclass"
         , metric = "multi_error"
         , num_class = 3L
     )
+    , objective = "multiclass"
     , nrounds = 20L
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
 
@@ -129,15 +131,16 @@ test_that("train and predict softmax", {
 test_that("use of multiple eval metrics works", {
   metrics <- list("binary_error", "auc", "binary_logloss")
   bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
         num_leaves = 4L
         , learning_rate = 1.0
-        , objective = "binary"
         , metric = metrics
     )
+    , objective = "binary"
     , nrounds = 10L
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
   expect_false(is.null(bst$record_evals))
@@ -153,14 +156,15 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
   set.seed(708L)
   nrounds <- 10L
   bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
         num_leaves = 5L
-        , objective = "binary"
         , metric = "binary_error"
     )
+    , objective = "binary"
     , nrounds = nrounds
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
   expect_true(abs(bst$lower_bound() - -1.590853) < TOLERANCE)
@@ -171,14 +175,15 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
   set.seed(708L)
   nrounds <- 10L
   bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
         num_leaves = 5L
-        , objective = "regression"
         , metric = "l2"
     )
+    , objective = "regression"
     , nrounds = nrounds
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
   expect_true(abs(bst$lower_bound() - 0.1513859) < TOLERANCE)
@@ -186,15 +191,16 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
 })
 
 test_that("lightgbm() rejects negative or 0 value passed to nrounds", {
-  dtrain <- lgb.Dataset(train$data, label = train$label)
-  params <- list(objective = "regression", metric = "l2,l1")
+  params <- list(metric = "l2,l1")
   for (nround_value in c(-10L, 0L)) {
     expect_error({
       bst <- lightgbm(
-        data = dtrain
+       X = train$data
+       , y = train$label
         , params = params
+        , objective = "regression"
         , nrounds = nround_value
-        , save_name = tempfile(fileext = ".model")
+        , nthreads = 1L
       )
     }, "nrounds should be greater than zero")
   }
@@ -205,12 +211,13 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
 
   set.seed(708L)
   top_level_bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
+    , objective = "regression"
     , nrounds = nrounds
+    , nthreads = 1L
     , params = list(
-      objective = "regression"
-      , metric = "l2"
+      metric = "l2"
       , num_leaves = 5L
     )
     , save_name = tempfile(fileext = ".model")
@@ -218,25 +225,27 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
 
   set.seed(708L)
   param_bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
-      objective = "regression"
-      , metric = "l2"
+      metric = "l2"
       , num_leaves = 5L
       , nrounds = nrounds
     )
+    , objective = "regression"
+    , nthreads = 1L
     , save_name = tempfile(fileext = ".model")
   )
 
   set.seed(708L)
   both_customized <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
+    , objective = "regression"
     , nrounds = 20L
+    , nthreads = 1L
     , params = list(
-      objective = "regression"
-      , metric = "l2"
+      metric = "l2"
       , num_leaves = 5L
       , nrounds = nrounds
     )
@@ -274,17 +283,18 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide
   )
   nrounds <- 10L
   bst <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , params = list(
         num_leaves = 5L
-        , objective = "binary"
         , metric = c(
             "binary_error"
             , "auc"
         )
     )
+    , objective = "binary"
     , nrounds = nrounds
+    , nthreads = 1L
     , valids = list(
       "valid1" = dvalid1
       , "valid2" = dvalid2
@@ -311,10 +321,11 @@ test_that("lightgbm() does not write model to disk if save_name=NULL", {
   files_before <- list.files(getwd())
 
   model <- lightgbm(
-    data = train$data
-    , label = train$label
+    X = train$data
+    , y = train$label
     , nrounds = 5L
-    , params = list(objective = "binary")
+    , nthreads = 1L
+    , objective = "binary"
     , verbose = 0L
     , save_name = NULL
   )
@@ -790,8 +801,8 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
   expect_true(is.numeric(both_l2))
 
   # check that model produces identical performance
-  expect_identical(top_level_l2, params_l2)
-  expect_identical(both_l2, params_l2)
+  expect_equal(top_level_l2, params_l2)
+  expect_equal(both_l2, params_l2)
 
   expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
   expect_identical(param_bst$current_iter(), both_customized$current_iter())
@@ -1650,7 +1661,7 @@ test_that("lgb.train() supports non-ASCII feature names", {
   }
 })
 
-test_that("lgb.train() works with integer, double, and numeric data", {
+test_that("lightgbm() works with integer, double, and numeric data", {
   data(mtcars)
   X <- as.matrix(mtcars[, -1L])
   y <- mtcars[, 1L, drop = TRUE]
@@ -1659,15 +1670,16 @@ test_that("lgb.train() works with integer, double, and numeric data", {
     mode(X) <- data_mode
     nrounds <- 10L
     bst <- lightgbm(
-      data = X
-      , label = y
+      X = X
+      , y = y
       , params = list(
-        objective = "regression"
-        , min_data = 1L
+        min_data = 1L
         , learning_rate = 0.01
         , seed = 708L
       )
+      , objective = "regression"
       , nrounds = nrounds
+      , nthreads = 1L
     )
 
     # should have trained for 10 iterations and found splits
@@ -1893,7 +1905,7 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   expect_identical(bst$best_score, NA_real_)
 })
 
-test_that("lightgbm.train() gives the correct best_score and best_iter for a metric where higher values are better", {
+test_that("lgb.train() gives the correct best_score and best_iter for a metric where higher values are better", {
   set.seed(708L)
   trainDF <- data.frame(
     "feat1" = runif(n = 500L, min = 0.0, max = 15.0)
@@ -1967,16 +1979,18 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
   )
   nrounds <- 10L
   bst <- lightgbm(
-    data = dtrain
+    X = as.matrix(trainDF[["feat1"]], drop = FALSE)
+    , y = trainDF[["target"]]
+    , objective = "binary"
     , nrounds = nrounds
+    , nthreads = 1L
     , valids = list(
       "valid1" = dvalid1
       , "something-random-we-would-not-hardcode" = dtrain
       , "valid2" = dvalid2
     )
     , params = list(
-      objective = "binary"
-      , metric = "auc"
+      metric = "auc"
       , learning_rate = 1.5
       , num_leaves = 5L
     )
@@ -1987,7 +2001,7 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
   # untouched. If you set verbose to > 0, the training data will still be first but called "train"
   expect_named(
     bst$record_evals
-    , c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
+    , c("start_iter", "valid1", "something-random-we-would-not-hardcode", "valid2")
     , ignore.order = FALSE
     , ignore.case = FALSE
   )
@@ -2516,7 +2530,7 @@ test_that("lgb.train() throws an informative error if interaction_constraints is
   dtrain <- lgb.Dataset(train$data, label = train$label)
   params <- list(objective = "regression", interaction_constraints = "[1,2],[3]")
     expect_error({
-      bst <- lightgbm(
+      bst <- lgb.train(
         data = dtrain
         , params = params
         , nrounds = 2L
@@ -2530,9 +2544,12 @@ test_that(paste0("lgb.train() throws an informative error if the members of inte
   params <- list(objective = "regression", interaction_constraints = list(list(1L, 2L), list(3L)))
     expect_error({
       bst <- lightgbm(
-        data = dtrain
+        X = train$data
+        , y =  train$label
         , params = params
         , nrounds = 2L
+        , objective = "regression"
+        , nthreads = 1L
       )
     }, "every element in interaction_constraints must be a character vector or numeric vector")
 })
@@ -2542,7 +2559,7 @@ test_that("lgb.train() throws an informative error if interaction_constraints co
   params <- list(objective = "regression",
                  interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
     expect_error({
-      bst <- lightgbm(
+      bst <- lgb.train(
         data = dtrain
         , params = params
         , nrounds = 2L
@@ -2556,7 +2573,7 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
   dtrain <- lgb.Dataset(train$data, label = train$label)
 
   params <- list(objective = "regression", interaction_constraints = list(c(1L, 2L), 3L))
-  bst <- lightgbm(
+  bst <- lgb.train(
     data = dtrain
     , params = params
     , nrounds = 2L
@@ -2565,7 +2582,7 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
 
   cnames <- colnames(train$data)
   params <- list(objective = "regression", interaction_constraints = list(c(cnames[[1L]], cnames[[2L]]), cnames[[3L]]))
-  bst <- lightgbm(
+  bst <- lgb.train(
     data = dtrain
     , params = params
     , nrounds = 2L
@@ -2573,7 +2590,7 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
   pred2 <- bst$predict(test$data)
 
   params <- list(objective = "regression", interaction_constraints = list(c(cnames[[1L]], cnames[[2L]]), 3L))
-  bst <- lightgbm(
+  bst <- lgb.train(
     data = dtrain
     , params = params
     , nrounds = 2L
@@ -2590,7 +2607,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
   dtrain <- lgb.Dataset(train$data, label = train$label)
 
   params <- list(objective = "regression", interaction_constraints = list(c(1L, 2L), 3L))
-  bst <- lightgbm(
+  bst <- lgb.train(
     data = dtrain
     , params = params
     , nrounds = 2L
@@ -2600,7 +2617,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
   new_colnames <- paste0(colnames(train$data), "_x")
   params <- list(objective = "regression"
                  , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L]))
-  bst <- lightgbm(
+  bst <- lgb.train(
     data = dtrain
     , params = params
     , nrounds = 2L
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 77a5ce402238..1308413660b1 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -127,8 +127,8 @@ test_that("lgb.load() gives the expected error messages given different incorrec
     train <- agaricus.train
     test <- agaricus.test
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             objective = "binary"
             , num_leaves = 4L
@@ -136,6 +136,7 @@ test_that("lgb.load() gives the expected error messages given different incorrec
             , verbose = VERBOSITY
         )
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
 
@@ -174,8 +175,8 @@ test_that("Loading a Booster from a text file works", {
     train <- agaricus.train
     test <- agaricus.test
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
@@ -183,6 +184,7 @@ test_that("Loading a Booster from a text file works", {
             , verbose = VERBOSITY
         )
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -251,8 +253,8 @@ test_that("Loading a Booster from a string works", {
     train <- agaricus.train
     test <- agaricus.test
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
@@ -260,6 +262,7 @@ test_that("Loading a Booster from a string works", {
             , verbose = VERBOSITY
         )
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -284,21 +287,22 @@ test_that("Saving a large model to string should work", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 100L
             , learning_rate = 0.01
             , objective = "binary"
         )
         , nrounds = 500L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
         , verbose = VERBOSITY
     )
 
     pred <- predict(bst, train$data)
-    pred_leaf_indx <- predict(bst, train$data, predleaf = TRUE)
-    pred_raw_score <- predict(bst, train$data, rawscore = TRUE)
+    pred_leaf_indx <- predict(bst, train$data, type = "leaf")
+    pred_raw_score <- predict(bst, train$data, type = "raw")
     model_string <- bst$save_model_to_string()
 
     # make sure this test is still producing a model bigger than the default
@@ -316,8 +320,8 @@ test_that("Saving a large model to string should work", {
         model_str = model_string
     )
     pred2 <- predict(bst2, train$data)
-    pred2_leaf_indx <- predict(bst2, train$data, predleaf = TRUE)
-    pred2_raw_score <- predict(bst2, train$data, rawscore = TRUE)
+    pred2_leaf_indx <- predict(bst2, train$data, type = "leaf")
+    pred2_raw_score <- predict(bst2, train$data, type = "raw")
     expect_identical(pred, pred2)
     expect_identical(pred_leaf_indx, pred2_leaf_indx)
     expect_identical(pred_raw_score, pred2_raw_score)
@@ -328,14 +332,15 @@ test_that("Saving a large model to JSON should work", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 100L
             , learning_rate = 0.01
-            , objective = "binary"
         )
+        , objective = "binary"
         , nrounds = 200L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
         , verbose = VERBOSITY
     )
@@ -358,15 +363,16 @@ test_that("If a string and a file are both passed to lgb.load() the file is used
     train <- agaricus.train
     test <- agaricus.test
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -414,15 +420,16 @@ test_that("Creating a Booster from a Dataset with an existing predictor should w
     data(agaricus.train, package = "lightgbm")
     nrounds <- 2L
     bst <- lightgbm(
-        data = as.matrix(agaricus.train$data)
-        , label = agaricus.train$label
+        X = as.matrix(agaricus.train$data)
+        , y = agaricus.train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = nrounds
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     data(agaricus.test, package = "lightgbm")
@@ -508,15 +515,16 @@ test_that("Booster$rollback_one_iter() should work as expected", {
     test <- agaricus.test
     nrounds <- 5L
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = nrounds
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_equal(bst$current_iter(), nrounds)
@@ -543,15 +551,16 @@ test_that("Booster$update() passing a train_set works as expected", {
 
     # train with 2 rounds and then update
     bst <- lightgbm(
-        data = as.matrix(agaricus.train$data)
-        , label = agaricus.train$label
+        X = as.matrix(agaricus.train$data)
+        , y = agaricus.train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = nrounds
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -567,15 +576,16 @@ test_that("Booster$update() passing a train_set works as expected", {
 
     # train with 3 rounds directly
     bst2 <- lightgbm(
-        data = as.matrix(agaricus.train$data)
-        , label = agaricus.train$label
+        X = as.matrix(agaricus.train$data)
+        , y = agaricus.train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = nrounds +  1L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst2))
@@ -593,15 +603,16 @@ test_that("Booster$update() throws an informative error if you provide a non-Dat
 
     # train with 2 rounds and then update
     bst <- lightgbm(
-        data = as.matrix(agaricus.train$data)
-        , label = agaricus.train$label
+        X = as.matrix(agaricus.train$data)
+        , y = agaricus.train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = nrounds
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_error({
@@ -689,15 +700,16 @@ test_that("Saving a model with different feature importance types works", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -745,15 +757,16 @@ test_that("Saving a model with unknown importance type fails", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     bst <- lightgbm(
-        data = as.matrix(train$data)
-        , label = train$label
+        X = as.matrix(train$data)
+        , y = train$label
         , params = list(
             num_leaves = 4L
             , learning_rate = 1.0
-            , objective = "binary"
             , verbose = VERBOSITY
         )
+        , objective = "binary"
         , nrounds = 2L
+        , nthreads = 1L
         , save_name = tempfile(fileext = ".model")
     )
     expect_true(lgb.is.Booster(bst))
@@ -1097,8 +1110,9 @@ test_that("Handle is automatically restored when calling predict", {
     bst <- lightgbm(
         agaricus.train$data
         , agaricus.train$label
+        , objective = "binary"
         , nrounds = 5L
-        , obj = "binary"
+        , nthreads = 1L
         , params = list(
             verbose = VERBOSITY
         )
diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R
index 1b03f09aa379..f4a898eafde2 100644
--- a/R-package/tests/testthat/test_parameters.R
+++ b/R-package/tests/testthat/test_parameters.R
@@ -12,16 +12,17 @@ test_that("Feature penalties work properly", {
     feature_penalties <- rep(1.0, ncol(train$data))
     feature_penalties[var_index] <- x
     lightgbm(
-      data = train$data
-      , label = train$label
+      X = train$data
+      , y = train$label
       , params = list(
         num_leaves = 5L
         , learning_rate = 0.05
-        , objective = "binary"
         , feature_penalty = paste0(feature_penalties, collapse = ",")
         , metric = "binary_error"
       )
+      , objective = "binary"
       , nrounds = 5L
+      , nthreads = 1L
       , verbose = -1L
       , save_name = tempfile(fileext = ".model")
     )
@@ -64,16 +65,17 @@ test_that("training should warn if you use 'dart' boosting, specified with 'boos
     params <- list(
         num_leaves = 5L
         , learning_rate = 0.05
-        , objective = "binary"
         , metric = "binary_error"
     )
     params[[boosting_param]] <- "dart"
     expect_warning({
       result <- lightgbm(
-        data = train$data
-        , label = train$label
+        X = train$data
+        , y = train$label
         , params = params
+        , objective = "binary"
         , nrounds = 5L
+        , nthreads = 1L
         , verbose = -1L
         , save_name = tempfile(fileext = ".model")
       )
diff --git a/R-package/vignettes/basic_walkthrough.Rmd b/R-package/vignettes/basic_walkthrough.Rmd
index d7aaf676f386..3379a49f76dc 100644
--- a/R-package/vignettes/basic_walkthrough.Rmd
+++ b/R-package/vignettes/basic_walkthrough.Rmd
@@ -51,22 +51,37 @@ The R package of LightGBM offers two functions to train a model:
 
 ### Using the `lightgbm()` function
 
-In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function.
+The `lightgbm()` function provides a formula interface as well as an X/y interface. As a
+first step, the variable to predict needs to be converted to a `factor`.
 
 ```{r}
 # Numeric response and feature matrix
+bank$y <- factor(bank$y)
+
+# Train with formula interface
+fit <- lightgbm(
+  y ~ age + balance
+  , data = bank
+  , params = list(
+    num_leaves = 4L
+    , learning_rate = 1.0
+  )
+  , nrounds = 10L
+  , verbose = -1L
+)
+
+# Train with X/y interface
 y <- as.numeric(bank$y == "yes")
 X <- data.matrix(bank[, c("age", "balance")])
 
-# Train
 fit <- lightgbm(
-  data = X
-  , label = y
+  X
+  , y
   , params = list(
     num_leaves = 4L
     , learning_rate = 1.0
-    , objective = "binary"
   )
+  , objective = "binary"
   , nrounds = 10L
   , verbose = -1L
 )