From 151bf0703acbdfbb137ceb9b016c235e54150113 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 14 Apr 2020 16:16:51 +0300
Subject: [PATCH 01/20] Update CODEOWNERS (#2996)

---
 .github/CODEOWNERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6002b2dafaa5..fdcce7242e00 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -10,9 +10,9 @@
 *    @guolinke @StrikerRUS @jameslamb @Laurae2
 
 # main C++ code
-include/    @guolinke @chivee
-src/    @guolinke @chivee
-CmakeLists.txt    @guolinke @chivee @Laurae2 @jameslamb @wxchan @henry0312 @StrikerRUS @huanzhang12
+include/    @guolinke @chivee @btrotta
+src/    @guolinke @chivee @btrotta
+CmakeLists.txt    @guolinke @chivee @Laurae2 @jameslamb @wxchan @henry0312 @StrikerRUS @huanzhang12 @btrotta
 
 # R code
 include/LightGBM/lightgbm_R.h    @Laurae2 @jameslamb

From 181616748bf4a27aa7b9ec7ae340f6eadbc1b78e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 15 Apr 2020 01:17:56 +0100
Subject: [PATCH 02/20] [R-package] [docs] Simplified examples to cut example
 run time (fixes #2988) (#2989)

* [R-package] [docs] Simplified examles to cut example run time (fixes #2988)

* updated learning rates
---
 R-package/R/lgb.Booster.R                    | 21 +++++------
 R-package/R/lgb.Dataset.R                    | 11 ------
 R-package/R/lgb.cv.R                         |  4 +-
 R-package/R/lgb.importance.R                 | 10 +++--
 R-package/R/lgb.interprete.R                 | 10 +++--
 R-package/R/lgb.plot.importance.R            | 12 +++---
 R-package/R/lgb.plot.interpretation.R        | 39 ++++++++++++++------
 R-package/R/lgb.prepare.R                    |  1 -
 R-package/R/lgb.prepare2.R                   |  1 -
 R-package/R/lgb.prepare_rules.R              |  1 -
 R-package/R/lgb.prepare_rules2.R             |  1 -
 R-package/R/lgb.train.R                      |  5 +--
 R-package/R/lgb.unloader.R                   |  4 +-
 R-package/R/readRDS.lgb.Booster.R            |  3 +-
 R-package/R/saveRDS.lgb.Booster.R            |  2 +
 R-package/man/dim.Rd                         |  1 -
 R-package/man/dimnames.lgb.Dataset.Rd        |  1 -
 R-package/man/getinfo.Rd                     |  1 -
 R-package/man/lgb.Dataset.Rd                 |  1 -
 R-package/man/lgb.Dataset.construct.Rd       |  1 -
 R-package/man/lgb.Dataset.create.valid.Rd    |  1 -
 R-package/man/lgb.Dataset.save.Rd            |  1 -
 R-package/man/lgb.Dataset.set.categorical.Rd |  1 -
 R-package/man/lgb.Dataset.set.reference.Rd   |  1 -
 R-package/man/lgb.cv.Rd                      |  4 +-
 R-package/man/lgb.dump.Rd                    |  3 +-
 R-package/man/lgb.get.eval.result.Rd         |  4 +-
 R-package/man/lgb.importance.Rd              | 10 +++--
 R-package/man/lgb.interprete.Rd              | 10 +++--
 R-package/man/lgb.load.Rd                    |  8 ++--
 R-package/man/lgb.plot.importance.Rd         | 12 +++---
 R-package/man/lgb.plot.interpretation.Rd     | 39 ++++++++++++++------
 R-package/man/lgb.prepare.Rd                 |  1 -
 R-package/man/lgb.prepare2.Rd                |  1 -
 R-package/man/lgb.prepare_rules.Rd           |  1 -
 R-package/man/lgb.prepare_rules2.Rd          |  1 -
 R-package/man/lgb.save.Rd                    |  2 +
 R-package/man/lgb.train.Rd                   |  5 +--
 R-package/man/lgb.unloader.Rd                |  4 +-
 R-package/man/lgb_shared_params.Rd           |  3 +-
 R-package/man/lightgbm.Rd                    |  3 +-
 R-package/man/predict.lgb.Booster.Rd         |  4 +-
 R-package/man/readRDS.lgb.Booster.Rd         |  3 +-
 R-package/man/saveRDS.lgb.Booster.Rd         |  2 +
 R-package/man/setinfo.Rd                     |  1 -
 R-package/man/slice.Rd                       |  1 -
 46 files changed, 132 insertions(+), 124 deletions(-)

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index f7aa4d10f49d..3e1bd0de0b78 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -711,7 +711,6 @@ Booster <- R6::R6Class(
 #'         number of columns corresponding to the number of trees.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -723,11 +722,10 @@ Booster <- R6::R6Class(
 #' model <- lgb.train(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , valids = valids
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
 #' )
 #' preds <- predict(model, test$data)
 #' @export
@@ -769,7 +767,7 @@ predict.lgb.Booster <- function(object,
 #' @return lgb.Booster
 #'
 #' @examples
-#' library(lightgbm)
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -781,17 +779,17 @@ predict.lgb.Booster <- function(object,
 #' model <- lgb.train(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , valids = valids
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
+#'   , early_stopping_rounds = 3L
 #' )
 #' lgb.save(model, "model.txt")
 #' load_booster <- lgb.load(filename = "model.txt")
 #' model_string <- model$save_model_to_string(NULL) # saves best iteration
 #' load_booster_from_str <- lgb.load(model_str = model_string)
-#'
+#' }
 #' @export
 lgb.load <- function(filename = NULL, model_str = NULL) {
 
@@ -828,6 +826,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @return lgb.Booster
 #'
 #' @examples
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -847,6 +846,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'   , early_stopping_rounds = 5L
 #' )
 #' lgb.save(model, "model.txt")
+#' }
 #' @export
 lgb.save <- function(booster, filename, num_iteration = NULL) {
 
@@ -874,6 +874,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @return json format of model
 #'
 #' @examples
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -893,7 +894,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #'   , early_stopping_rounds = 5L
 #' )
 #' json_model <- lgb.dump(model)
-#'
+#' }
 #' @export
 lgb.dump <- function(booster, num_iteration = NULL) {
 
@@ -922,7 +923,6 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #'
 #' @examples
 #' # train a regression model
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -934,11 +934,10 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #' model <- lgb.train(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , valids = valids
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
 #' )
 #'
 #' # Examine valid data_name values
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index c361a6c423c3..22afc2666718 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -725,7 +725,6 @@ Dataset <- R6::R6Class(
 #' @return constructed dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -770,7 +769,6 @@ lgb.Dataset <- function(data,
 #' @return constructed dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -797,7 +795,6 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
 #' @param dataset Object of class \code{lgb.Dataset}
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -828,7 +825,6 @@ lgb.Dataset.construct <- function(dataset) {
 #' be directly used with an \code{lgb.Dataset} object.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -863,7 +859,6 @@ dim.lgb.Dataset <- function(x, ...) {
 #' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -936,7 +931,6 @@ dimnames.lgb.Dataset <- function(x) {
 #' @return constructed sub dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -983,7 +977,6 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 #' }
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1037,7 +1030,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' }
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1078,7 +1070,6 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 #' @return passed dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1109,7 +1100,6 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #' @return passed dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package ="lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1141,7 +1131,6 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' @return passed dataset
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 3433aade6594..90e54773c786 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -56,7 +56,6 @@ CVBooster <- R6::R6Class(
 #' @return a trained model \code{lgb.CVBooster}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -64,11 +63,10 @@ CVBooster <- R6::R6Class(
 #' model <- lgb.cv(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , nfold = 3L
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
 #' )
 #' @importFrom data.table data.table setorderv
 #' @export
diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R
index acef193ebbc4..3064673f664a 100644
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -13,20 +13,22 @@
 #' }
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #'
 #' params <- list(
 #'   objective = "binary"
-#'   , learning_rate = 0.01
-#'   , num_leaves = 63L
+#'   , learning_rate = 0.1
 #'   , max_depth = -1L
 #'   , min_data_in_leaf = 1L
 #'   , min_sum_hessian_in_leaf = 1.0
 #' )
-#' model <- lgb.train(params, dtrain, 10L)
+#' model <- lgb.train(
+#'     params = params
+#'     , data = dtrain
+#'     , nrounds = 5L
+#' )
 #'
 #' tree_imp1 <- lgb.importance(model, percentage = TRUE)
 #' tree_imp2 <- lgb.importance(model, percentage = FALSE)
diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R
index eb0ecd94a6a1..e97fb1b590a1 100644
--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
@@ -16,7 +16,6 @@
 #'         Contribution columns to each class.
 #'
 #' @examples
-#' Sigmoid <- function(x) 1.0 / (1.0 + exp(-x))
 #' Logit <- function(x) log(x / (1.0 - x))
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -27,13 +26,16 @@
 #'
 #' params <- list(
 #'     objective = "binary"
-#'     , learning_rate = 0.01
-#'     , num_leaves = 63L
+#'     , learning_rate = 0.1
 #'     , max_depth = -1L
 #'     , min_data_in_leaf = 1L
 #'     , min_sum_hessian_in_leaf = 1.0
 #' )
-#' model <- lgb.train(params, dtrain, 10L)
+#' model <- lgb.train(
+#'     params = params
+#'     , data = dtrain
+#'     , nrounds = 3L
+#' )
 #'
 #' tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
 #'
diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R
index b05dac39b91a..ec496c4213f3 100644
--- a/R-package/R/lgb.plot.importance.R
+++ b/R-package/R/lgb.plot.importance.R
@@ -24,17 +24,19 @@
 #'
 #' params <- list(
 #'     objective = "binary"
-#'     , learning_rate = 0.01
-#'     , num_leaves = 63L
-#'     , max_depth = -1L
+#'     , learning_rate = 0.1
 #'     , min_data_in_leaf = 1L
 #'     , min_sum_hessian_in_leaf = 1.0
 #' )
 #'
-#' model <- lgb.train(params, dtrain, 10L)
+#' model <- lgb.train(
+#'     params = params
+#'     , data = dtrain
+#'     , nrounds = 5L
+#' )
 #'
 #' tree_imp <- lgb.importance(model, percentage = TRUE)
-#' lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain")
+#' lgb.plot.importance(tree_imp, top_n = 5L, measure = "Gain")
 #' @importFrom graphics barplot par
 #' @export
 lgb.plot.importance <- function(tree_imp,
diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R
index 2914ddf94f97..0efd71ccc929 100644
--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -15,28 +15,43 @@
 #' The \code{lgb.plot.interpretation} function creates a \code{barplot}.
 #'
 #' @examples
-#' library(lightgbm)
-#' Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))}
-#' Logit <- function(x) {log(x / (1.0 - x))}
+#' \donttest{
+#' Logit <- function(x) {
+#'   log(x / (1.0 - x))
+#' }
 #' data(agaricus.train, package = "lightgbm")
-#' train <- agaricus.train
-#' dtrain <- lgb.Dataset(train$data, label = train$label)
-#' setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
+#' labels <- agaricus.train$label
+#' dtrain <- lgb.Dataset(
+#'   agaricus.train$data
+#'   , label = labels
+#' )
+#' setinfo(dtrain, "init_score", rep(Logit(mean(labels)), length(labels)))
+#'
 #' data(agaricus.test, package = "lightgbm")
-#' test <- agaricus.test
 #'
 #' params <- list(
 #'   objective = "binary"
-#'   , learning_rate = 0.01
-#'   , num_leaves = 63L
+#'   , learning_rate = 0.1
 #'   , max_depth = -1L
 #'   , min_data_in_leaf = 1L
 #'   , min_sum_hessian_in_leaf = 1.0
 #' )
-#' model <- lgb.train(params, dtrain, 10L)
+#' model <- lgb.train(
+#'   params = params
+#'   , data = dtrain
+#'   , nrounds = 5L
+#' )
 #'
-#' tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
-#' lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L)
+#' tree_interpretation <- lgb.interprete(
+#'   model = model
+#'   , data = agaricus.test$data
+#'   , idxset = 1L:5L
+#' )
+#' lgb.plot.interpretation(
+#'   tree_interpretation_dt = tree_interpretation[[1L]]
+#'   , top_n = 5L
+#' )
+#' }
 #' @importFrom data.table setnames
 #' @importFrom graphics barplot par
 #' @export
diff --git a/R-package/R/lgb.prepare.R b/R-package/R/lgb.prepare.R
index 42a9daa0d434..863271e06040 100644
--- a/R-package/R/lgb.prepare.R
+++ b/R-package/R/lgb.prepare.R
@@ -8,7 +8,6 @@
 #'         for input in \code{lgb.Dataset}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(iris)
 #'
 #' str(iris)
diff --git a/R-package/R/lgb.prepare2.R b/R-package/R/lgb.prepare2.R
index f2fdc89d0c2a..0d7179ed9496 100644
--- a/R-package/R/lgb.prepare2.R
+++ b/R-package/R/lgb.prepare2.R
@@ -11,7 +11,6 @@
 #'         for input in \code{lgb.Dataset}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(iris)
 #'
 #' str(iris)
diff --git a/R-package/R/lgb.prepare_rules.R b/R-package/R/lgb.prepare_rules.R
index e6efe89ab25f..307a69e32a38 100644
--- a/R-package/R/lgb.prepare_rules.R
+++ b/R-package/R/lgb.prepare_rules.R
@@ -10,7 +10,6 @@
 #'         in \code{lgb.Dataset}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(iris)
 #'
 #' str(iris)
diff --git a/R-package/R/lgb.prepare_rules2.R b/R-package/R/lgb.prepare_rules2.R
index dab2ae5f5271..62688a765b47 100644
--- a/R-package/R/lgb.prepare_rules2.R
+++ b/R-package/R/lgb.prepare_rules2.R
@@ -13,7 +13,6 @@
 #'         \code{lgb.Dataset}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(iris)
 #'
 #' str(iris)
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index d0dacecc0bd1..c41f32e15c8e 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -29,7 +29,6 @@
 #' @return a trained booster model \code{lgb.Booster}.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -41,11 +40,11 @@
 #' model <- lgb.train(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , valids = valids
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
+#'   , early_stopping_rounds = 3L
 #' )
 #' @export
 lgb.train <- function(params = list(),
diff --git a/R-package/R/lgb.unloader.R b/R-package/R/lgb.unloader.R
index cb80e2f01ff6..aaafca019358 100644
--- a/R-package/R/lgb.unloader.R
+++ b/R-package/R/lgb.unloader.R
@@ -14,7 +14,6 @@
 #' @return NULL invisibly.
 #'
 #' @examples
-#' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -26,11 +25,10 @@
 #' model <- lgb.train(
 #'   params = params
 #'   , data = dtrain
-#'   , nrounds = 10L
+#'   , nrounds = 5L
 #'   , valids = valids
 #'   , min_data = 1L
 #'   , learning_rate = 1.0
-#'   , early_stopping_rounds = 5L
 #' )
 #'
 #' \dontrun{
diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R
index e68dd8c963de..f0c862f33c74 100644
--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
@@ -7,6 +7,7 @@
 #' @return \code{lgb.Booster}.
 #'
 #' @examples
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -27,7 +28,7 @@
 #' )
 #' saveRDS.lgb.Booster(model, "model.rds")
 #' new_model <- readRDS.lgb.Booster("model.rds")
-#'
+#' }
 #' @export
 readRDS.lgb.Booster <- function(file = "", refhook = NULL) {
 
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index 21bd8483628f..855e1e1b6c8e 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -18,6 +18,7 @@
 #' @return NULL invisibly.
 #'
 #' @examples
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -37,6 +38,7 @@
 #'     , early_stopping_rounds = 5L
 #' )
 #' saveRDS.lgb.Booster(model, "model.rds")
+#' }
 #' @export
 saveRDS.lgb.Booster <- function(object,
                                 file = "",
diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd
index 4fdb64252f7e..55fde26d6a5b 100644
--- a/R-package/man/dim.Rd
+++ b/R-package/man/dim.Rd
@@ -22,7 +22,6 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
 be directly used with an \code{lgb.Dataset} object.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd
index 5f85ff12bd6f..22be85149646 100644
--- a/R-package/man/dimnames.lgb.Dataset.Rd
+++ b/R-package/man/dimnames.lgb.Dataset.Rd
@@ -24,7 +24,6 @@ Generic \code{dimnames} methods are used by \code{colnames}.
 Since row names are irrelevant, it is recommended to use \code{colnames} directly.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 29254d8622f3..2925308ed7e9 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -33,7 +33,6 @@ The \code{name} field can be one of the following:
 }
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd
index b6cbb5327a14..fb1d1067a53e 100644
--- a/R-package/man/lgb.Dataset.Rd
+++ b/R-package/man/lgb.Dataset.Rd
@@ -40,7 +40,6 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
              or local file (that was created previously by saving an \code{lgb.Dataset}).
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd
index 23dfc0e9f67b..4338f84b669c 100644
--- a/R-package/man/lgb.Dataset.construct.Rd
+++ b/R-package/man/lgb.Dataset.construct.Rd
@@ -13,7 +13,6 @@ lgb.Dataset.construct(dataset)
 Construct Dataset explicitly
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd
index e48c93772a39..0669f1887171 100644
--- a/R-package/man/lgb.Dataset.create.valid.Rd
+++ b/R-package/man/lgb.Dataset.create.valid.Rd
@@ -22,7 +22,6 @@ constructed dataset
 Construct validation data according to training data
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd
index d8446f030936..26895999d11a 100644
--- a/R-package/man/lgb.Dataset.save.Rd
+++ b/R-package/man/lgb.Dataset.save.Rd
@@ -19,7 +19,6 @@ Please note that \code{init_score} is not saved in binary file.
              If you need it, please set it again after loading Dataset.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd
index e1f03cfbf9e6..0ab44b56bb0d 100644
--- a/R-package/man/lgb.Dataset.set.categorical.Rd
+++ b/R-package/man/lgb.Dataset.set.categorical.Rd
@@ -21,7 +21,6 @@ Set the categorical features of an \code{lgb.Dataset} object. Use this function
              to tell LightGBM which features should be treated as categorical.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd
index fabe7c03e6fd..e8bd41820286 100644
--- a/R-package/man/lgb.Dataset.set.reference.Rd
+++ b/R-package/man/lgb.Dataset.set.reference.Rd
@@ -18,7 +18,6 @@ passed dataset
 If you want to use validation data, you should set reference to training data
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package ="lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index 01473339cf6f..673392f54568 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -100,7 +100,6 @@ a trained model \code{lgb.CVBooster}.
 Cross validation logic used by LightGBM
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -108,10 +107,9 @@ params <- list(objective = "regression", metric = "l2")
 model <- lgb.cv(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , nfold = 3L
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
 )
 }
diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd
index 828ba4ac7ea9..6fbc5cbe9b43 100644
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@@ -18,6 +18,7 @@ json format of model
 Dump LightGBM model to json
 }
 \examples{
+\donttest{
 library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
@@ -37,5 +38,5 @@ model <- lgb.train(
   , early_stopping_rounds = 5L
 )
 json_model <- lgb.dump(model)
-
+}
 }
diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd
index c5473825e61a..5707d8ccb6c4 100644
--- a/R-package/man/lgb.get.eval.result.Rd
+++ b/R-package/man/lgb.get.eval.result.Rd
@@ -33,7 +33,6 @@ Given a \code{lgb.Booster}, return evaluation results for a
 }
 \examples{
 # train a regression model
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -45,11 +44,10 @@ valids <- list(test = dtest)
 model <- lgb.train(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , valids = valids
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
 )
 
 # Examine valid data_name values
diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd
index 3d6c1fae5217..5a269407859f 100644
--- a/R-package/man/lgb.importance.Rd
+++ b/R-package/man/lgb.importance.Rd
@@ -24,20 +24,22 @@ For a tree model, a \code{data.table} with the following columns:
 Creates a \code{data.table} of feature importances in a model.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
 
 params <- list(
   objective = "binary"
-  , learning_rate = 0.01
-  , num_leaves = 63L
+  , learning_rate = 0.1
   , max_depth = -1L
   , min_data_in_leaf = 1L
   , min_sum_hessian_in_leaf = 1.0
 )
-model <- lgb.train(params, dtrain, 10L)
+model <- lgb.train(
+    params = params
+    , data = dtrain
+    , nrounds = 5L
+)
 
 tree_imp1 <- lgb.importance(model, percentage = TRUE)
 tree_imp2 <- lgb.importance(model, percentage = FALSE)
diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd
index aa8aedf156f4..86fb8ecb515b 100644
--- a/R-package/man/lgb.interprete.Rd
+++ b/R-package/man/lgb.interprete.Rd
@@ -29,7 +29,6 @@ For regression, binary classification and lambdarank model, a \code{list} of \co
 Computes feature contribution components of rawscore prediction.
 }
 \examples{
-Sigmoid <- function(x) 1.0 / (1.0 + exp(-x))
 Logit <- function(x) log(x / (1.0 - x))
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
@@ -40,13 +39,16 @@ test <- agaricus.test
 
 params <- list(
     objective = "binary"
-    , learning_rate = 0.01
-    , num_leaves = 63L
+    , learning_rate = 0.1
     , max_depth = -1L
     , min_data_in_leaf = 1L
     , min_sum_hessian_in_leaf = 1.0
 )
-model <- lgb.train(params, dtrain, 10L)
+model <- lgb.train(
+    params = params
+    , data = dtrain
+    , nrounds = 3L
+)
 
 tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
 
diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd
index c2b1500e9bc8..5f7c2354733e 100644
--- a/R-package/man/lgb.load.Rd
+++ b/R-package/man/lgb.load.Rd
@@ -19,7 +19,7 @@ Load LightGBM takes in either a file path or model string.
               If both are provided, Load will default to loading from file
 }
 \examples{
-library(lightgbm)
+\donttest{
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -31,15 +31,15 @@ valids <- list(test = dtest)
 model <- lgb.train(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , valids = valids
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
+  , early_stopping_rounds = 3L
 )
 lgb.save(model, "model.txt")
 load_booster <- lgb.load(filename = "model.txt")
 model_string <- model$save_model_to_string(NULL) # saves best iteration
 load_booster_from_str <- lgb.load(model_str = model_string)
-
+}
 }
diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd
index 97775efd704d..024077a08409 100644
--- a/R-package/man/lgb.plot.importance.Rd
+++ b/R-package/man/lgb.plot.importance.Rd
@@ -43,15 +43,17 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
 
 params <- list(
     objective = "binary"
-    , learning_rate = 0.01
-    , num_leaves = 63L
-    , max_depth = -1L
+    , learning_rate = 0.1
     , min_data_in_leaf = 1L
     , min_sum_hessian_in_leaf = 1.0
 )
 
-model <- lgb.train(params, dtrain, 10L)
+model <- lgb.train(
+    params = params
+    , data = dtrain
+    , nrounds = 5L
+)
 
 tree_imp <- lgb.importance(model, percentage = TRUE)
-lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain")
+lgb.plot.importance(tree_imp, top_n = 5L, measure = "Gain")
 }
diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd
index f5fa6497f2c0..b8818eecdbc0 100644
--- a/R-package/man/lgb.plot.interpretation.Rd
+++ b/R-package/man/lgb.plot.interpretation.Rd
@@ -34,26 +34,41 @@ The graph represents each feature as a horizontal bar of length proportional to
 contribution of a feature. Features are shown ranked in a decreasing contribution order.
 }
 \examples{
-library(lightgbm)
-Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))}
-Logit <- function(x) {log(x / (1.0 - x))}
+\donttest{
+Logit <- function(x) {
+  log(x / (1.0 - x))
+}
 data(agaricus.train, package = "lightgbm")
-train <- agaricus.train
-dtrain <- lgb.Dataset(train$data, label = train$label)
-setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
+labels <- agaricus.train$label
+dtrain <- lgb.Dataset(
+  agaricus.train$data
+  , label = labels
+)
+setinfo(dtrain, "init_score", rep(Logit(mean(labels)), length(labels)))
+
 data(agaricus.test, package = "lightgbm")
-test <- agaricus.test
 
 params <- list(
   objective = "binary"
-  , learning_rate = 0.01
-  , num_leaves = 63L
+  , learning_rate = 0.1
   , max_depth = -1L
   , min_data_in_leaf = 1L
   , min_sum_hessian_in_leaf = 1.0
 )
-model <- lgb.train(params, dtrain, 10L)
+model <- lgb.train(
+  params = params
+  , data = dtrain
+  , nrounds = 5L
+)
 
-tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
-lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L)
+tree_interpretation <- lgb.interprete(
+  model = model
+  , data = agaricus.test$data
+  , idxset = 1L:5L
+)
+lgb.plot.interpretation(
+  tree_interpretation_dt = tree_interpretation[[1L]]
+  , top_n = 5L
+)
+}
 }
diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd
index dc1fed72e698..db726b15d36a 100644
--- a/R-package/man/lgb.prepare.Rd
+++ b/R-package/man/lgb.prepare.Rd
@@ -19,7 +19,6 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
              \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
 }
 \examples{
-library(lightgbm)
 data(iris)
 
 str(iris)
diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd
index e4eaf53df2f6..eef44758f42b 100644
--- a/R-package/man/lgb.prepare2.Rd
+++ b/R-package/man/lgb.prepare2.Rd
@@ -22,7 +22,6 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
              input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
 }
 \examples{
-library(lightgbm)
 data(iris)
 
 str(iris)
diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd
index a766b7f26af6..69821ef7d240 100644
--- a/R-package/man/lgb.prepare_rules.Rd
+++ b/R-package/man/lgb.prepare_rules.Rd
@@ -22,7 +22,6 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
              so you can convert other datasets using this converter.
 }
 \examples{
-library(lightgbm)
 data(iris)
 
 str(iris)
diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd
index b19f275f2009..7fe3de7c4929 100644
--- a/R-package/man/lgb.prepare_rules2.Rd
+++ b/R-package/man/lgb.prepare_rules2.Rd
@@ -25,7 +25,6 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
              Consider this as a half memory technique which is dangerous, especially for LightGBM.
 }
 \examples{
-library(lightgbm)
 data(iris)
 
 str(iris)
diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd
index 70bd098a0913..91e2befb738f 100644
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@@ -20,6 +20,7 @@ lgb.Booster
 Save LightGBM model
 }
 \examples{
+\donttest{
 library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
@@ -40,3 +41,4 @@ model <- lgb.train(
 )
 lgb.save(model, "model.txt")
 }
+}
diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
index 98298ab6f954..b471e0c7601f 100644
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -83,7 +83,6 @@ a trained booster model \code{lgb.Booster}.
 Logic to train with LightGBM
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -95,10 +94,10 @@ valids <- list(test = dtest)
 model <- lgb.train(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , valids = valids
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
+  , early_stopping_rounds = 3L
 )
 }
diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd
index 758a831ee3dd..ca69c08b602c 100644
--- a/R-package/man/lgb.unloader.Rd
+++ b/R-package/man/lgb.unloader.Rd
@@ -26,7 +26,6 @@ Attempts to unload LightGBM packages so you can remove objects cleanly without
              apparent reason and you do not want to restart R to fix the lost object.
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -38,11 +37,10 @@ valids <- list(test = dtest)
 model <- lgb.train(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , valids = valids
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
 )
 
 \dontrun{
diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd
index ae2f61a86256..a7143179495e 100644
--- a/R-package/man/lgb_shared_params.Rd
+++ b/R-package/man/lgb_shared_params.Rd
@@ -4,7 +4,8 @@
 \alias{lgb_shared_params}
 \title{Shared parameter docs}
 \arguments{
-\item{callbacks}{List of callback functions that are applied at each iteration.}
+\item{callbacks}{list of callback functions
+List of callback functions that are applied at each iteration.}
 
 \item{data}{a \code{lgb.Dataset} object, used for training. Some functions, such as \code{\link{lgb.cv}},
 may allow you to pass other types of data like \code{matrix} and then separately supply
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 88d98d13525d..256a7dc6e8e9 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -45,7 +45,8 @@ If early stopping occurs, the model will have 'best_iter' field.}
 
 \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
 
-\item{callbacks}{List of callback functions that are applied at each iteration.}
+\item{callbacks}{list of callback functions
+List of callback functions that are applied at each iteration.}
 
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd
index 985cd763689a..40444cbff7be 100644
--- a/R-package/man/predict.lgb.Booster.Rd
+++ b/R-package/man/predict.lgb.Booster.Rd
@@ -52,7 +52,6 @@ For regression or binary classification, it returns a vector of length \code{nro
 Predicted values based on class \code{lgb.Booster}
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -64,11 +63,10 @@ valids <- list(test = dtest)
 model <- lgb.train(
   params = params
   , data = dtrain
-  , nrounds = 10L
+  , nrounds = 5L
   , valids = valids
   , min_data = 1L
   , learning_rate = 1.0
-  , early_stopping_rounds = 5L
 )
 preds <- predict(model, test$data)
 }
diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd
index 809333aef0dc..be03fd1cfcb8 100644
--- a/R-package/man/readRDS.lgb.Booster.Rd
+++ b/R-package/man/readRDS.lgb.Booster.Rd
@@ -18,6 +18,7 @@ readRDS.lgb.Booster(file = "", refhook = NULL)
 Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}}
 }
 \examples{
+\donttest{
 library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
@@ -38,5 +39,5 @@ model <- lgb.train(
 )
 saveRDS.lgb.Booster(model, "model.rds")
 new_model <- readRDS.lgb.Booster("model.rds")
-
+}
 }
diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd
index 2d1fbb636a93..3e7bb368d878 100644
--- a/R-package/man/saveRDS.lgb.Booster.Rd
+++ b/R-package/man/saveRDS.lgb.Booster.Rd
@@ -42,6 +42,7 @@ Attempts to save a model using RDS. Has an additional parameter (\code{raw})
              which decides whether to save the raw model or not.
 }
 \examples{
+\donttest{
 library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
@@ -62,3 +63,4 @@ model <- lgb.train(
 )
 saveRDS.lgb.Booster(model, "model.rds")
 }
+}
diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd
index 74d18673a8a1..344f79cc4621 100644
--- a/R-package/man/setinfo.Rd
+++ b/R-package/man/setinfo.Rd
@@ -38,7 +38,6 @@ The \code{name} field can be one of the following:
 }
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd
index 6eaec3daeab2..90c837f222ab 100644
--- a/R-package/man/slice.Rd
+++ b/R-package/man/slice.Rd
@@ -24,7 +24,6 @@ Get a new \code{lgb.Dataset} containing the specified rows of
              original \code{lgb.Dataset} object
 }
 \examples{
-library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)

From 381c2970a4b6ee706f56255c364bd5348b7c14e3 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 15 Apr 2020 04:37:36 +0100
Subject: [PATCH 03/20] [R-package] Removed unused constant TYPE_BITS (#2994)

---
 include/LightGBM/R_object_helper.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/LightGBM/R_object_helper.h b/include/LightGBM/R_object_helper.h
index 1464c0a7083f..5be578e957f2 100644
--- a/include/LightGBM/R_object_helper.h
+++ b/include/LightGBM/R_object_helper.h
@@ -15,7 +15,6 @@
 
 #include <cstdint>
 
-#define TYPE_BITS 5
 // use .Internal(internalsID()) to uuid
 #define R_INTERNALS_UUID "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327"
 

From 7d5bfdaf2bdb1c69e162f00406f894ba09b73950 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 15 Apr 2020 04:39:27 +0100
Subject: [PATCH 04/20] [R-package] Updated package metadata in DESCRIPTION
 (#2993)

---
 R-package/DESCRIPTION | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 6720c418389b..55df4a779eb3 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -4,11 +4,11 @@ Title: Light Gradient Boosting Machine
 Version: 2.3.2
 Date: 2019-11-26
 Authors@R: c(
-	person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
-	person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("ctb")),
-	person("Yachen", "Yan", role = c("ctb")),
-	person("James", "Lamb", email="jaylamb20@gmail.com", role = c("ctb"))
-	)
+    person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
+    person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("ctb")),
+    person("Yachen", "Yan", role = c("ctb")),
+    person("James", "Lamb", email="jaylamb20@gmail.com", role = c("ctb"))
+    )
 Description: Tree based algorithms can be improved by introducing boosting frameworks. LightGBM is one such framework, and this package offers an R interface to work with it.
     It is designed to be distributed and efficient with the following advantages:
         1. Faster training speed and higher efficiency.
@@ -16,12 +16,14 @@ Description: Tree based algorithms can be improved by introducing boosting frame
         3. Better accuracy.
         4. Parallel learning supported.
         5. Capable of handling large-scale data.
-    In recognition of these advantages, LightGBM has being widely-used in many winning solutions of machine learning competitions.
+    In recognition of these advantages, LightGBM has been widely-used in many winning solutions of machine learning competitions.
     Comparison experiments on public datasets suggest that LightGBM can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. In addition, parallel experiments suggest that in certain circumstances, LightGBM can achieve a linear speed-up in training time by using multiple machines.
 Encoding: UTF-8
 License: MIT + file LICENSE
 URL: https://github.com/Microsoft/LightGBM
 BugReports: https://github.com/Microsoft/LightGBM/issues
+NeedsCompilation: yes
+Biarch: false
 Suggests:
     ggplot2 (>= 1.0.1),
     knitr,
@@ -37,4 +39,6 @@ Imports:
     Matrix (>= 1.1-0),
     methods,
     utils
+SystemRequirements:
+    C++11
 RoxygenNote: 7.0.2

From 9843506e63c546b989df16e6235261fd38ce247d Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 16 Apr 2020 03:52:45 +0100
Subject: [PATCH 05/20] [ci] Changed use of strcpy to snprintf (fixes #1990)
 (#2973)

* [ci] Changed use of strcpy to snprintf

* fix

* fully enable cpplint
---
 .ci/test.sh              | 2 +-
 include/LightGBM/c_api.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index 2b8b48daa6c0..3cb25ee10d14 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -56,7 +56,7 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then
     echo "Linting R code"
     Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit -1
     echo "Linting C++ code"
-    cpplint --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length --recursive ./src ./include || exit 0
+    cpplint --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length --recursive ./src ./include || exit -1
     exit 0
 fi
 
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 8e9a6fc2500c..6a30fce495c5 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -16,6 +16,7 @@
 #include <LightGBM/export.h>
 
 #include <cstdint>
+#include <cstdio>
 #include <cstring>
 
 
@@ -1074,7 +1075,8 @@ static char* LastErrorMsg() { static THREAD_LOCAL char err_msg[512] = "Everythin
  * \param msg Error message
  */
 inline void LGBM_SetLastError(const char* msg) {
-  std::strcpy(LastErrorMsg(), msg);
+  const int err_buf_len = 512;
+  snprintf(LastErrorMsg(), err_buf_len, "%s", msg);
 }
 
 #endif  // LIGHTGBM_C_API_H_

From 995a5974c988049e302fa9c5a26f72731c1b2a16 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Thu, 16 Apr 2020 17:28:28 +0300
Subject: [PATCH 06/20] [docs] updated core team list (#3001)

---
 docs/FAQ.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index 8816a4430f97..898edb0f8b66 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -22,6 +22,7 @@ You may also ping a member of the core team according to the relevant area of ex
 
 -  `@guolinke <https://github.com/guolinke>`__ **Guolin Ke** (C++ code / R-package / Python-package)
 -  `@chivee <https://github.com/chivee>`__ **Qiwei Ye** (C++ code / Python-package)
+-  `@btrotta <https://github.com/btrotta>`__ **Belinda Trotta** (C++ code)
 -  `@Laurae2 <https://github.com/Laurae2>`__ **Damien Soukhavong** (R-package)
 -  `@jameslamb <https://github.com/jameslamb>`__ **James Lamb** (R-package)
 -  `@wxchan <https://github.com/wxchan>`__ **Wenxuan Chen** (Python-package)

From 0d3e2046533acdda62f15936fb8636f7c8bc27d6 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Sun, 19 Apr 2020 09:34:43 -0400
Subject: [PATCH 07/20] [SWIG][mmlspark] allow allocating more than int max
 array (#2859)

---
 swig/lightgbmlib.i | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/swig/lightgbmlib.i b/swig/lightgbmlib.i
index 5fba17e12adf..985dfb481f2a 100644
--- a/swig/lightgbmlib.i
+++ b/swig/lightgbmlib.i
@@ -233,11 +233,6 @@
 %pointer_cast(int32_t *, void *, int32_t_to_voidp_ptr)
 %pointer_cast(int64_t *, void *, int64_t_to_voidp_ptr)
 
-%array_functions(double, doubleArray)
-%array_functions(float, floatArray)
-%array_functions(int, intArray)
-%array_functions(long, longArray)
-
 /* Custom pointer manipulation template */
 %define %pointer_manipulation(TYPE, NAME)
 %{
@@ -278,6 +273,36 @@ TYPE *NAME##_handle();
 
 %enddef
 
+%define %long_array_functions(TYPE,NAME)
+%{
+  static TYPE *new_##NAME(int64_t nelements) { %}
+  %{  return new TYPE[nelements](); %}
+  %{}
+
+  static void delete_##NAME(TYPE *ary) { %}
+  %{  delete [] ary; %}
+  %{}
+
+  static TYPE NAME##_getitem(TYPE *ary, int64_t index) {
+    return ary[index];
+  }
+  static void NAME##_setitem(TYPE *ary, int64_t index, TYPE value) {
+    ary[index] = value;
+  }
+  %}
+
+TYPE *new_##NAME(int64_t nelements);
+void delete_##NAME(TYPE *ary);
+TYPE NAME##_getitem(TYPE *ary, int64_t index);
+void NAME##_setitem(TYPE *ary, int64_t index, TYPE value);
+
+%enddef
+
+%long_array_functions(double, doubleArray)
+%long_array_functions(float, floatArray)
+%long_array_functions(int, intArray)
+%long_array_functions(long, longArray)
+
 %pointer_manipulation(void*, voidpp)
 
 /* Allow dereferencing of void** to void* */

From 9478e52f9fd751353d4e0e04899c2b090f1ff0fe Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 21 Apr 2020 19:47:23 +0100
Subject: [PATCH 08/20] [R-package] fix R examples and
 lgb.plot.interpretation() (#3002)

* [R-package] fix R examples and lgb.plot.interpretation

* remove space in gitignore

* try data.table from conda-forge

* update FAQ

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .gitignore                                   |  8 ++++++++
 R-package/DESCRIPTION                        |  2 +-
 R-package/R/lgb.Booster.R                    |  2 +-
 R-package/R/lgb.Dataset.R                    |  4 ++--
 R-package/R/lgb.plot.interpretation.R        |  4 ++--
 R-package/R/lgb.prepare_rules.R              | 10 +++++++---
 R-package/R/saveRDS.lgb.Booster.R            |  2 +-
 R-package/man/agaricus.test.Rd               |  6 ++++--
 R-package/man/agaricus.train.Rd              |  6 ++++--
 R-package/man/bank.Rd                        |  4 +++-
 R-package/man/lgb.Dataset.set.categorical.Rd |  4 ++--
 R-package/man/lgb.plot.interpretation.Rd     |  2 +-
 R-package/man/lgb.prepare_rules.Rd           | 10 +++++++---
 R-package/man/lgb.save.Rd                    |  2 +-
 R-package/man/saveRDS.lgb.Booster.Rd         |  2 +-
 docs/FAQ.rst                                 |  2 +-
 docs/conf.py                                 |  2 +-
 17 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/.gitignore b/.gitignore
index 283349aee0fa..5a06037ed68f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -406,6 +406,14 @@ lightgbm_r/*
 lightgbm*.tar.gz
 lightgbm.Rcheck/
 
+# Files created by R examples and tests
+**/lgb-Dataset.data
+**/lgb-model.rds
+**/lgb.Dataset.data
+**/model.rds
+**/model.txt
+**/lgb-model.txt
+
 # Files from interactive R sessions
 .Rproj.user
 **/.Rhistory
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 55df4a779eb3..1924b6a1d666 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -41,4 +41,4 @@ Imports:
     utils
 SystemRequirements:
     C++11
-RoxygenNote: 7.0.2
+RoxygenNote: 7.1.0
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 3e1bd0de0b78..d912cd871a57 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -845,7 +845,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'   , learning_rate = 1.0
 #'   , early_stopping_rounds = 5L
 #' )
-#' lgb.save(model, "model.txt")
+#' lgb.save(model, "lgb-model.txt")
 #' }
 #' @export
 lgb.save <- function(booster, filename, num_iteration = NULL) {
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index 22afc2666718..fed95913d2d6 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -1073,8 +1073,8 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
-#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
-#' dtrain <- lgb.Dataset("lgb.Dataset.data")
+#' lgb.Dataset.save(dtrain, "lgb-Dataset.data")
+#' dtrain <- lgb.Dataset("lgb-Dataset.data")
 #' lgb.Dataset.set.categorical(dtrain, 1L:2L)
 #'
 #' @rdname lgb.Dataset.set.categorical
diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R
index 0efd71ccc929..486b80dd46dd 100644
--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -49,7 +49,7 @@
 #' )
 #' lgb.plot.interpretation(
 #'   tree_interpretation_dt = tree_interpretation[[1L]]
-#'   , top_n = 5L
+#'   , top_n = 3L
 #' )
 #' }
 #' @importFrom data.table setnames
@@ -141,7 +141,7 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
   }
 
   # create plot
-  tree_interpretation[Contribution > 0.0, bar_color := "firebrick"]
+  tree_interpretation[abs(Contribution) > 0.0, bar_color := "firebrick"]
   tree_interpretation[Contribution == 0.0, bar_color := "steelblue"]
   tree_interpretation[.N:1L,
                       graphics::barplot(
diff --git a/R-package/R/lgb.prepare_rules.R b/R-package/R/lgb.prepare_rules.R
index 307a69e32a38..3eda16672ce7 100644
--- a/R-package/R/lgb.prepare_rules.R
+++ b/R-package/R/lgb.prepare_rules.R
@@ -36,9 +36,13 @@
 #' data(iris) # Erase iris dataset
 #'
 #' # We remapped values differently
-#' personal_rules <- list(Species = c("setosa" = 3L,
-#'                                    "versicolor" = 2L,
-#'                                    "virginica" = 1L))
+#' personal_rules <- list(
+#'     Species = c(
+#'         "setosa" = 3L
+#'         , "versicolor" = 2L
+#'         , "virginica" = 1L
+#'     )
+#' )
 #' newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
 #' str(newest_iris$data) # SUCCESS!
 #'
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index 855e1e1b6c8e..185186c9bc25 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -37,7 +37,7 @@
 #'     , learning_rate = 1.0
 #'     , early_stopping_rounds = 5L
 #' )
-#' saveRDS.lgb.Booster(model, "model.rds")
+#' saveRDS.lgb.Booster(model, "lgb-model.rds")
 #' }
 #' @export
 saveRDS.lgb.Booster <- function(object,
diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd
index c713cbde6a9a..aad4627eedae 100644
--- a/R-package/man/agaricus.test.Rd
+++ b/R-package/man/agaricus.test.Rd
@@ -4,8 +4,10 @@
 \name{agaricus.test}
 \alias{agaricus.test}
 \title{Test part from Mushroom Data Set}
-\format{A list containing a label vector, and a dgCMatrix object with 1611
-rows and 126 variables}
+\format{
+A list containing a label vector, and a dgCMatrix object with 1611
+rows and 126 variables
+}
 \usage{
 data(agaricus.test)
 }
diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd
index 9465a6efea29..62b7d05a0bc3 100644
--- a/R-package/man/agaricus.train.Rd
+++ b/R-package/man/agaricus.train.Rd
@@ -4,8 +4,10 @@
 \name{agaricus.train}
 \alias{agaricus.train}
 \title{Training part from Mushroom Data Set}
-\format{A list containing a label vector, and a dgCMatrix object with 6513
-rows and 127 variables}
+\format{
+A list containing a label vector, and a dgCMatrix object with 6513
+rows and 127 variables
+}
 \usage{
 data(agaricus.train)
 }
diff --git a/R-package/man/bank.Rd b/R-package/man/bank.Rd
index fd1382eb87d8..3b4c13b24d54 100644
--- a/R-package/man/bank.Rd
+++ b/R-package/man/bank.Rd
@@ -4,7 +4,9 @@
 \name{bank}
 \alias{bank}
 \title{Bank Marketing Data Set}
-\format{A data.table with 4521 rows and 17 variables}
+\format{
+A data.table with 4521 rows and 17 variables
+}
 \usage{
 data(bank)
 }
diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd
index 0ab44b56bb0d..097eea02b465 100644
--- a/R-package/man/lgb.Dataset.set.categorical.Rd
+++ b/R-package/man/lgb.Dataset.set.categorical.Rd
@@ -24,8 +24,8 @@ Set the categorical features of an \code{lgb.Dataset} object. Use this function
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
-lgb.Dataset.save(dtrain, "lgb.Dataset.data")
-dtrain <- lgb.Dataset("lgb.Dataset.data")
+lgb.Dataset.save(dtrain, "lgb-Dataset.data")
+dtrain <- lgb.Dataset("lgb-Dataset.data")
 lgb.Dataset.set.categorical(dtrain, 1L:2L)
 
 }
diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd
index b8818eecdbc0..f8266308552d 100644
--- a/R-package/man/lgb.plot.interpretation.Rd
+++ b/R-package/man/lgb.plot.interpretation.Rd
@@ -68,7 +68,7 @@ tree_interpretation <- lgb.interprete(
 )
 lgb.plot.interpretation(
   tree_interpretation_dt = tree_interpretation[[1L]]
-  , top_n = 5L
+  , top_n = 3L
 )
 }
 }
diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd
index 69821ef7d240..cc34bf11ca5c 100644
--- a/R-package/man/lgb.prepare_rules.Rd
+++ b/R-package/man/lgb.prepare_rules.Rd
@@ -48,9 +48,13 @@ all.equal(new_iris$data, newer_iris$data)
 data(iris) # Erase iris dataset
 
 # We remapped values differently
-personal_rules <- list(Species = c("setosa" = 3L,
-                                   "versicolor" = 2L,
-                                   "virginica" = 1L))
+personal_rules <- list(
+    Species = c(
+        "setosa" = 3L
+        , "versicolor" = 2L
+        , "virginica" = 1L
+    )
+)
 newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
 str(newest_iris$data) # SUCCESS!
 
diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd
index 91e2befb738f..f1ffd48355ee 100644
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@@ -39,6 +39,6 @@ model <- lgb.train(
   , learning_rate = 1.0
   , early_stopping_rounds = 5L
 )
-lgb.save(model, "model.txt")
+lgb.save(model, "lgb-model.txt")
 }
 }
diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd
index 3e7bb368d878..66afa861db9f 100644
--- a/R-package/man/saveRDS.lgb.Booster.Rd
+++ b/R-package/man/saveRDS.lgb.Booster.Rd
@@ -61,6 +61,6 @@ model <- lgb.train(
     , learning_rate = 1.0
     , early_stopping_rounds = 5L
 )
-saveRDS.lgb.Booster(model, "model.rds")
+saveRDS.lgb.Booster(model, "lgb-model.rds")
 }
 }
diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index 898edb0f8b66..71d9ad38bd19 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -211,7 +211,7 @@ This is a known bug: `Microsoft/LightGBM#539 <https://github.com/microsoft/Light
 3. ``error in data.table::data.table()...argument 2 is NULL``
 -------------------------------------------------------------
 
-If you experiencing this error when running `lightgbm`, you may be facing the same issue reported in `#2715 <https://github.com/microsoft/LightGBM/issues/2715>`_. If you use ``lgb.dl()`` to build from source (i.e. not using pre-compiled dll), you need to upgrade your version of ``data.table`` to at least version 1.12.0.
+If you are experiencing this error when running ``lightgbm``, you may be facing the same issue reported in `#2715 <https://github.com/microsoft/LightGBM/issues/2715>`_ and later in `#2989 <https://github.com/microsoft/LightGBM/pull/2989#issuecomment-614374151>`_. We have seen that some in some situations, using ``data.table`` 1.11.x results in this error. To get around this, you can upgrade your version of ``data.table`` to at least version 1.12.0.
 
 ------
 
diff --git a/docs/conf.py b/docs/conf.py
index c157566d3805..cf499354ee54 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -227,13 +227,13 @@ def generate_r_docs(app):
     /home/docs/.conda/bin/conda create -q -y -n r_env \
         r-base=3.5.1=h1e0a451_2 \
         r-devtools=1.13.6=r351h6115d3f_0 \
-        r-data.table=1.11.4=r351h96ca727_0 \
         r-jsonlite=1.5=r351h96ca727_0 \
         r-matrix=1.2_14=r351h96ca727_0 \
         r-testthat=2.0.0=r351h29659fb_0 \
         cmake=3.14.0=h52cb24c_0 \
         ca-certificates=2019.11.27=0
     /home/docs/.conda/bin/conda install -q -y -n r_env -c conda-forge \
+        r-data.table=1.12.8=r35hcdcec82_0 \
         r-pkgdown=1.3.0=r35h6115d3f_1001 \
         r-roxygen2=6.1.1=r35h0357c0b_1001
     source /home/docs/.conda/bin/activate r_env

From 30607d80fbf389fb504cf0428f48394ede1fc7fd Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Wed, 22 Apr 2020 21:47:52 +0300
Subject: [PATCH 09/20] be compatible with Sphinx 3 (#3013)

---
 docs/conf.py               | 1 +
 docs/requirements.txt      | 2 +-
 docs/requirements_base.txt | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index cf499354ee54..6bbf7d52f498 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -139,6 +139,7 @@ def run(self):
         "h": "c",
     }
     breathe_show_define_initializer = True
+    c_id_attributes = ['LIGHTGBM_C_EXPORT']
 
 # -- Options for HTML output ----------------------------------------------
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 2fb1ed05cb53..17896e0c7283 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,2 +1,2 @@
 -r requirements_base.txt
-breathe < 4.15
+breathe
diff --git a/docs/requirements_base.txt b/docs/requirements_base.txt
index 23bd0e5b8c86..9c3dfc2a5b90 100644
--- a/docs/requirements_base.txt
+++ b/docs/requirements_base.txt
@@ -1,3 +1,3 @@
-sphinx < 3.0
+sphinx
 sphinx_rtd_theme >= 0.3
 mock; python_version < '3'

From b013af08fac226ab116bf1001c9b066ca22c4bb4 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Thu, 23 Apr 2020 05:49:19 +0300
Subject: [PATCH 10/20] [ci][docs] Fix RTD builds (#3015)

* hotfix for pip error

* Update conf.py

* Update conf.py

* Update conf.py

* fix rtd builds
---
 .readthedocs.yml          | 2 +-
 docs/requirements_rtd.txt | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 docs/requirements_rtd.txt

diff --git a/.readthedocs.yml b/.readthedocs.yml
index c2e19847a4fc..fe005bc307a8 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -4,7 +4,7 @@ formats:
 python:
   version: 3
   install:
-    - requirements: docs/requirements.txt
+    - requirements: docs/requirements_rtd.txt
 sphinx:
   builder: html
   configuration: docs/conf.py
diff --git a/docs/requirements_rtd.txt b/docs/requirements_rtd.txt
new file mode 100644
index 000000000000..91ce2a1b1e6b
--- /dev/null
+++ b/docs/requirements_rtd.txt
@@ -0,0 +1,4 @@
+sphinx >= 3.0.2
+sphinx_rtd_theme >= 0.3
+mock; python_version < '3'
+breathe

From 02a0089fc1c627e9f98776ab64c2aaac5209a56e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 22 Apr 2020 22:35:56 -0500
Subject: [PATCH 11/20] [ci] added echoing of R CMD check logs to CI (#3009)

---
 .ci/test_r_package.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index 4dddd36225f4..10891383411b 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -86,9 +86,17 @@ export _R_CHECK_FORCE_SUGGESTS_=0
 
 # fails tests if either ERRORs or WARNINGs are thrown by
 # R CMD CHECK
+check_succeeded="yes"
 R CMD check ${PKG_TARBALL} \
     --as-cran \
-|| exit -1
+|| check_succeeded="no"
+
+echo "R CMD check build logs:"
+cat ${BUILD_DIRECTORY}/lightgbm.Rcheck/00install.out
+
+if [[ $check_succeeded == "no" ]]; then
+    exit -1
+fi
 
 if grep -q -R "WARNING" "$LOG_FILE_NAME"; then
     echo "WARNINGS have been found by R CMD check!"

From 7b3350176ddffbe7d8c4b8a0ba5132fa44e93533 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 24 Apr 2020 06:13:28 +0300
Subject: [PATCH 12/20] minor CI scripts update (#3019)

---
 .ci/setup.sh              | 4 ++--
 .ci/test_r_package.sh     | 7 -------
 .travis.yml               | 2 +-
 docker/dockerfile-python  | 2 +-
 docker/gpu/dockerfile.gpu | 2 +-
 5 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 39370fa27604..0c4101189346 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -17,7 +17,7 @@ if [[ $OS_NAME == "macos" ]]; then
     if [[ $AZURE == "true" ]] && [[ $TASK == "sdist" ]]; then
         brew install https://raw.githubusercontent.com/Homebrew/homebrew-core/f3544543a3115023fc7ca962c21d14b443f419d0/Formula/swig.rb  # swig 3.0.12
     fi
-    wget -q -O conda.sh https://repo.continuum.io/miniconda/Miniconda${PYTHON_VERSION:0:1}-latest-MacOSX-x86_64.sh
+    wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 else  # Linux
     if [[ $TASK == "mpi" ]]; then
         sudo apt-get update
@@ -37,7 +37,7 @@ else  # Linux
         echo libamdocl64.so > $OPENCL_VENDOR_PATH/amdocl64.icd
     fi
     if [[ $TRAVIS == "true" ]]; then
-        wget -q -O conda.sh https://repo.continuum.io/miniconda/Miniconda${PYTHON_VERSION:0:1}-latest-Linux-x86_64.sh
+        wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
     fi
 fi
 
diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index 10891383411b..12e3b4e5724b 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -34,11 +34,6 @@ fi
 
 # Installing R precompiled for Mac OS 10.11 or higher
 if [[ $OS_NAME == "macos" ]]; then
-
-    # temp fix for basictex
-    if [[ $AZURE == "true" ]]; then
-        brew update
-    fi
     brew install qpdf
     brew cask install basictex
     export PATH="/Library/TeX/texbin:$PATH"
@@ -113,5 +108,3 @@ if [[ ${NUM_CHECK_NOTES} -gt ${ALLOWED_CHECK_NOTES} ]]; then
     echo "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
     exit -1
 fi
-
-exit 0
diff --git a/.travis.yml b/.travis.yml
index acba8bfc3ad0..3f13504929c7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,7 +8,7 @@ os:
   - linux
   - osx
 dist: bionic
-osx_image: xcode11.3
+osx_image: xcode11.4
 
 env:
   global:  # default values
diff --git a/docker/dockerfile-python b/docker/dockerfile-python
index b157b41117ba..29fc4ece5b41 100644
--- a/docker/dockerfile-python
+++ b/docker/dockerfile-python
@@ -13,7 +13,7 @@ RUN apt-get update && \
         git \
         wget && \
     # python environment
-    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
     /bin/bash Miniconda3-latest-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
     export PATH="$CONDA_DIR/bin:$PATH" && \
     conda config --set always_yes yes --set changeps1 no && \
diff --git a/docker/gpu/dockerfile.gpu b/docker/gpu/dockerfile.gpu
index c4801d6e462f..08c243a57bce 100644
--- a/docker/gpu/dockerfile.gpu
+++ b/docker/gpu/dockerfile.gpu
@@ -70,7 +70,7 @@ ENV PATH $CONDA_DIR/bin:$PATH
 
 # Install miniconda
 RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \
-    wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
     /bin/bash ~/miniconda.sh -b -p $CONDA_DIR && \
     rm ~/miniconda.sh
 

From 4667d5032f81f8cc6aeb8c1c4620424b1beacf36 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 24 Apr 2020 18:31:16 +0300
Subject: [PATCH 13/20] update link to Julia wrapper (#3021)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 290590d80e9e..a6bc3916b5c5 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ External (Unofficial) Repositories
 
 Optuna (hyperparameter optimization framework): https://github.com/optuna/optuna
 
-Julia-package: https://github.com/Allardvm/LightGBM.jl
+Julia-package: https://github.com/IQVIA-ML/LightGBM.jl
 
 JPMML (Java PMML converter): https://github.com/jpmml/jpmml-lightgbm
 

From 22d6d1fd5a1d05444add1efb8f3480f6932a523f Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 24 Apr 2020 23:45:45 -0500
Subject: [PATCH 14/20] [R-package] Added tests on creating a Booster from a
 Dataset (#3007)

---
 R-package/tests/testthat/test_lgb.Booster.R | 52 +++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 8ccb357626ce..fbc84bc4ed46 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -227,3 +227,55 @@ test_that("If a string and a file are both passed to lgb.load() the file is used
     pred2 <- predict(bst2, test$data)
     expect_identical(pred, pred2)
 })
+
+context("Booster")
+
+test_that("Creating a Booster from a Dataset should work", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    dtrain <- lgb.Dataset(
+        agaricus.train$data
+        , label = agaricus.train$label
+    )
+    bst <- Booster$new(
+        params = list(
+            objective = "binary"
+        ),
+        train_set = dtrain
+    )
+    expect_true(lgb.is.Booster(bst))
+    expect_equal(bst$current_iter(), 0L)
+    expect_true(is.na(bst$best_score))
+    expect_true(all(bst$predict(agaricus.train$data) == 0.5))
+})
+
+test_that("Creating a Booster from a Dataset with an existing predictor should work", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    nrounds <- 2L
+    bst <- lightgbm(
+        data = as.matrix(agaricus.train$data)
+        , label = agaricus.train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = nrounds
+        , objective = "binary"
+    )
+    data(agaricus.test, package = "lightgbm")
+    dtest <- Dataset$new(
+        data = agaricus.test$data
+        , label = agaricus.test$label
+        , predictor = bst$to_predictor()
+    )
+    bst_from_ds <- Booster$new(
+        train_set = dtest
+    )
+    expect_true(lgb.is.Booster(bst))
+    expect_equal(bst$current_iter(), nrounds)
+    expect_equal(bst$eval_train()[[1L]][["value"]], 0.1115352)
+    expect_equal(bst_from_ds$current_iter(), nrounds)
+    dumped_model <- jsonlite::fromJSON(bst$dump_model())
+    expect_identical(bst_from_ds$eval_train(), list())
+    expect_equal(bst_from_ds$current_iter(), nrounds)
+})

From 5304885c0aa4f26e29ae2a4d451c0fb39d70f24b Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 24 Apr 2020 23:47:51 -0500
Subject: [PATCH 15/20] [R-package] Added tests on LGBM_GetLastError_R (#3005)

---
 R-package/R/utils.R                   |  2 ++
 R-package/tests/testthat/test_utils.R | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 9b036f91db8d..1e0e759d653b 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -19,6 +19,8 @@ lgb.encode.char <- function(arr, len) {
 
 }
 
+# [description] Raise an error. Before raising that error, check for any error message
+#               stored in a buffer on the C++ side.
 lgb.last_error <- function() {
   # Perform text error buffering
   buf_len <- 200L
diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R
index a16e6f742061..9765356a0df8 100644
--- a/R-package/tests/testthat/test_utils.R
+++ b/R-package/tests/testthat/test_utils.R
@@ -48,3 +48,23 @@ test_that("lgb.params2str() works as expected for a key in params with multiple
         , "objective=magic metric=a,ab,abc,abcdefg nrounds=10 learning_rate=0.0000001"
     )
 })
+
+context("lgb.last_error")
+
+test_that("lgb.last_error() throws an error if there are no errors", {
+    expect_error({
+        lgb.last_error()
+    }, regexp = "Everything is fine")
+})
+
+test_that("lgb.last_error() correctly returns errors from the C++ side", {
+    data(agaricus.train, package = "lightgbm")
+    train <- agaricus.train
+    dvalid1 <- lgb.Dataset(
+        data = train$data
+        , label = as.matrix(rnorm(5L))
+    )
+    expect_error({
+        dvalid1$construct()
+    }, regexp = "[LightGBM] [Fatal] Length of label is not same with #data", fixed = TRUE)
+})

From b05520e7ecfcb3a20857346238420b4cf7040d2e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 25 Apr 2020 00:17:17 -0500
Subject: [PATCH 16/20] [R-package] [docs] remove unnecessary build in test
 coverage (#3004)

---
 R-package/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/README.md b/R-package/README.md
index 6e4a6eb33050..c9474cf38107 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -147,7 +147,7 @@ The example below shows how to generate code coverage for the R package on a mac
 # Install
 export CXX=/usr/local/bin/g++-8
 export CC=/usr/local/bin/gcc-8
-Rscript build_r.R
+Rscript build_r.R --skip-install
 
 # Get coverage
 Rscript -e " \

From 2b9175aae498d115626ad2d944520033c8c18c6c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 25 Apr 2020 00:42:54 -0500
Subject: [PATCH 17/20] [R-package] added tests on
 LGBM_BoosterRollbackOneIter_R (#3006)

---
 R-package/tests/testthat/test_lgb.Booster.R | 32 +++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index fbc84bc4ed46..6b1cc20f957a 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -279,3 +279,35 @@ test_that("Creating a Booster from a Dataset with an existing predictor should w
     expect_identical(bst_from_ds$eval_train(), list())
     expect_equal(bst_from_ds$current_iter(), nrounds)
 })
+
+test_that("Booster$rollback_one_iter() should work as expected", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    train <- agaricus.train
+    test <- agaricus.test
+    nrounds <- 5L
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = nrounds
+        , objective = "binary"
+    )
+    expect_equal(bst$current_iter(), nrounds)
+    expect_true(lgb.is.Booster(bst))
+    logloss <- bst$eval_train()[[1L]][["value"]]
+    expect_equal(logloss, 0.01904786)
+
+    x <- bst$rollback_one_iter()
+
+    # rollback_one_iter() should return a booster and modify the original
+    # booster in place
+    expect_true(lgb.is.Booster(x))
+    expect_equal(bst$current_iter(), nrounds - 1L)
+
+    # score should now come from the model as of 4 iterations
+    logloss <- bst$eval_train()[[1L]][["value"]]
+    expect_equal(logloss, 0.027915146)
+})

From 1f3e72c43ca8485eeba988738ecb0e977c7977f1 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 25 Apr 2020 11:35:54 -0500
Subject: [PATCH 18/20] [R-package] fixed best_iter and best_score when
 training data is passed (fixes #2295, #2525) (#2961)

* [R-package] fixed best_iter and best_score when early_stopping is not used (fixes #2295)

* fixed tests
---
 R-package/R/callback.R                |  20 +-
 R-package/R/lgb.Booster.R             |   6 +-
 R-package/R/lgb.cv.R                  |  20 +-
 R-package/R/lgb.train.R               |  25 ++-
 R-package/tests/testthat/test_basic.R | 294 ++++++++++++++++++++++++++
 5 files changed, 343 insertions(+), 22 deletions(-)

diff --git a/R-package/R/callback.R b/R-package/R/callback.R
index 3c8bb243783b..d5495c9d56cd 100644
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -1,3 +1,11 @@
+# constants that control naming in lists
+.EVAL_KEY <- function() {
+  return("eval")
+}
+.EVAL_ERR_KEY <- function() {
+  return("eval_err")
+}
+
 #' @importFrom R6 R6Class
 CB_ENV <- R6::R6Class(
   "lgb.cb_env",
@@ -216,8 +224,8 @@ cb.record.evaluation <- function() {
 
         # Create dummy lists
         env$model$record_evals[[data_name]][[name]] <- list()
-        env$model$record_evals[[data_name]][[name]]$eval <- list()
-        env$model$record_evals[[data_name]][[name]]$eval_err <- list()
+        env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- list()
+        env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- list()
 
       }
 
@@ -238,12 +246,12 @@ cb.record.evaluation <- function() {
       name <- eval_res$name
 
       # Store evaluation data
-      env$model$record_evals[[data_name]][[name]]$eval <- c(
-        env$model$record_evals[[data_name]][[name]]$eval
+      env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- c(
+        env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]]
         , eval_res$value
       )
-      env$model$record_evals[[data_name]][[name]]$eval_err <- c(
-        env$model$record_evals[[data_name]][[name]]$eval_err
+      env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- c(
+        env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]]
         , eval_err
       )
 
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index d912cd871a57..c9eb81f4d3c7 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -5,7 +5,7 @@ Booster <- R6::R6Class(
   public = list(
 
     best_iter = -1L,
-    best_score = NA,
+    best_score = NA_real_,
     record_evals = list(),
 
     # Finalize will free up the handles
@@ -989,11 +989,11 @@ lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_
   }
 
   # Create result
-  result <- booster$record_evals[[data_name]][[eval_name]]$eval
+  result <- booster$record_evals[[data_name]][[eval_name]][[.EVAL_KEY()]]
 
   # Check if error is requested
   if (is_err) {
-    result <- booster$record_evals[[data_name]][[eval_name]]$eval_err
+    result <- booster$record_evals[[data_name]][[eval_name]][[.EVAL_ERR_KEY()]]
   }
 
   # Check if iteration is non existant
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 90e54773c786..6fa0a30c606f 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -370,14 +370,22 @@ lgb.cv <- function(params = list()
 
   }
 
+  # When early stopping is not activated, we compute the best iteration / score ourselves
+  # based on the first first metric
   if (record && is.na(env$best_score)) {
-    if (env$eval_list[[1L]]$higher_better[1L] == TRUE) {
-      cv_booster$best_iter <- unname(which.max(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]])))
-      cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]]
-    } else {
-      cv_booster$best_iter <- unname(which.min(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]])))
-      cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]]
+    first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L]
+    .find_best <- which.min
+    if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
+      .find_best <- which.max
     }
+    cv_booster$best_iter <- unname(
+      .find_best(
+        unlist(
+          cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]]
+        )
+      )
+    )
+    cv_booster$best_score <- cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]][[cv_booster$best_iter]]
   }
 
   if (reset_data) {
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index c41f32e15c8e..e6cea8076b9f 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -237,6 +237,7 @@ lgb.train <- function(params = list(),
   if (valid_contain_train) {
     booster$set_train_data_name(train_data_name)
   }
+
   for (key in names(reduced_valid_sets)) {
     booster$add_valid(reduced_valid_sets[[key]], key)
   }
@@ -290,16 +291,26 @@ lgb.train <- function(params = list(),
 
   }
 
+  # check if any valids were given other than the training data
+  non_train_valid_names <- names(valids)[!(names(valids) == train_data_name)]
+  first_valid_name <- non_train_valid_names[1L]
+
   # When early stopping is not activated, we compute the best iteration / score ourselves by
   # selecting the first metric and the first dataset
-  if (record && length(valids) > 0L && is.na(env$best_score)) {
-    if (env$eval_list[[1L]]$higher_better[1L] == TRUE) {
-      booster$best_iter <- unname(which.max(unlist(booster$record_evals[[2L]][[1L]][[1L]])))
-      booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]]
-    } else {
-      booster$best_iter <- unname(which.min(unlist(booster$record_evals[[2L]][[1L]][[1L]])))
-      booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]]
+  if (record && length(non_train_valid_names) > 0L && is.na(env$best_score)) {
+    first_metric <- booster$.__enclos_env__$private$eval_names[1L]
+    .find_best <- which.min
+    if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
+      .find_best <- which.max
     }
+    booster$best_iter <- unname(
+      .find_best(
+        unlist(
+          booster$record_evals[[first_valid_name]][[first_metric]][[.EVAL_KEY()]]
+        )
+      )
+    )
+    booster$best_score <- booster$record_evals[[first_valid_name]][[first_metric]][[.EVAL_KEY()]][[booster$best_iter]]
   }
 
   # Check for booster model conversion to predictor model
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index e3f1030d7755..3455c17cad31 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -250,6 +250,37 @@ test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset
   }
 })
 
+test_that("lightgbm.cv() gives the correct best_score and best_iter for a metric where higher values are better", {
+  set.seed(708L)
+  dtrain <- lgb.Dataset(
+    data = as.matrix(runif(n = 500L, min = 0.0, max = 15.0), drop = FALSE)
+    , label = rep(c(0L, 1L), 250L)
+  )
+  nrounds <- 10L
+  cv_bst <- lgb.cv(
+    data = dtrain
+    , nfold = 5L
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , params = list(
+      objective = "binary"
+      , metric = "auc,binary_error"
+      , learning_rate = 1.5
+    )
+  )
+  expect_is(cv_bst, "lgb.CVBooster")
+  expect_named(
+    cv_bst$record_evals
+    , c("start_iter", "valid")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  auc_scores <- unlist(cv_bst$record_evals[["valid"]][["auc"]][["eval"]])
+  expect_length(auc_scores, nrounds)
+  expect_identical(cv_bst$best_iter, which.max(auc_scores))
+  expect_identical(cv_bst$best_score, auc_scores[which.max(auc_scores)])
+})
+
 context("lgb.train()")
 
 test_that("lgb.train() works as expected with multiple eval metrics", {
@@ -595,3 +626,266 @@ test_that("lgb.train() supports non-ASCII feature names", {
     , feature_names
   )
 })
+
+test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {
+  set.seed(708L)
+  trainDF <- data.frame(
+    "feat1" = rep(c(10.0, 100.0), 500L)
+    , "target" = rep(c(-50.0, 50.0), 500L)
+  )
+  validDF <- data.frame(
+    "feat1" = rep(50.0, 4L)
+    , "target" = rep(50.0, 4L)
+  )
+  dtrain <- lgb.Dataset(
+    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
+    , label = trainDF[["target"]]
+  )
+  dvalid1 <- lgb.Dataset(
+    data = as.matrix(validDF[["feat1"]], drop = FALSE)
+    , label = validDF[["target"]]
+  )
+  dvalid2 <- lgb.Dataset(
+    data = as.matrix(validDF[1L:10L, "feat1"], drop = FALSE)
+    , label = validDF[1L:10L, "target"]
+  )
+  nrounds <- 10L
+  train_params <- list(
+    objective = "regression"
+    , metric = "rmse"
+    , learning_rate = 1.5
+  )
+
+  # example 1: two valids, neither are the training data
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "valid2" = dvalid2
+    )
+    , params = train_params
+  )
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
+  expect_length(rmse_scores, nrounds)
+  expect_identical(bst$best_iter, which.min(rmse_scores))
+  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
+
+  # example 2: train first (called "train") and two valids
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "train" = dtrain
+      , "valid1" = dvalid1
+      , "valid2" = dvalid2
+    )
+    , params = train_params
+  )
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "train", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
+  expect_length(rmse_scores, nrounds)
+  expect_identical(bst$best_iter, which.min(rmse_scores))
+  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
+
+  # example 3: train second (called "train") and two valids
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "train" = dtrain
+      , "valid2" = dvalid2
+    )
+    , params = train_params
+  )
+  # note that "train" still ends up as the first one
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "train", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
+  expect_length(rmse_scores, nrounds)
+  expect_identical(bst$best_iter, which.min(rmse_scores))
+  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
+
+  # example 4: train third (called "train") and two valids
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "valid2" = dvalid2
+      , "train" = dtrain
+    )
+    , params = train_params
+  )
+  # note that "train" still ends up as the first one
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "train", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
+  expect_length(rmse_scores, nrounds)
+  expect_identical(bst$best_iter, which.min(rmse_scores))
+  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
+
+  # example 5: train second (called "something-random-we-would-not-hardcode") and two valids
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "something-random-we-would-not-hardcode" = dtrain
+      , "valid2" = dvalid2
+    )
+    , params = train_params
+  )
+  # note that "something-random-we-would-not-hardcode" was recognized as the training
+  # data even though it isn't named "train"
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
+  expect_length(rmse_scores, nrounds)
+  expect_identical(bst$best_iter, which.min(rmse_scores))
+  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
+
+  # example 6: the only valid supplied is the training data
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "train" = dtrain
+    )
+    , params = train_params
+  )
+  expect_identical(bst$best_iter, -1L)
+  expect_identical(bst$best_score, NA_real_)
+})
+
+test_that("lightgbm.train() gives the correct best_score and best_iter for a metric where higher values are better", {
+  set.seed(708L)
+  trainDF <- data.frame(
+    "feat1" = runif(n = 500L, min = 0.0, max = 15.0)
+    , "target" = rep(c(0L, 1L), 500L)
+  )
+  validDF <- data.frame(
+    "feat1" = runif(n = 50L, min = 0.0, max = 15.0)
+    , "target" = rep(c(0L, 1L), 50L)
+  )
+  dtrain <- lgb.Dataset(
+    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
+    , label = trainDF[["target"]]
+  )
+  dvalid1 <- lgb.Dataset(
+    data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
+    , label = validDF[1L:25L, "target"]
+  )
+  nrounds <- 10L
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "something-random-we-would-not-hardcode" = dtrain
+    )
+    , params = list(
+      objective = "binary"
+      , metric = "auc"
+      , learning_rate = 1.5
+    )
+  )
+  # note that "something-random-we-would-not-hardcode" was recognized as the training
+  # data even though it isn't named "train"
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "something-random-we-would-not-hardcode", "valid1")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
+  expect_length(auc_scores, nrounds)
+  expect_identical(bst$best_iter, which.max(auc_scores))
+  expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
+})
+
+test_that("using lightgbm() without early stopping, best_iter and best_score come from valids and not training data", {
+  set.seed(708L)
+  # example: train second (called "something-random-we-would-not-hardcode"), two valids,
+  #          and a metric where higher values are better ("auc")
+  trainDF <- data.frame(
+    "feat1" = runif(n = 500L, min = 0.0, max = 15.0)
+    , "target" = rep(c(0L, 1L), 500L)
+  )
+  validDF <- data.frame(
+    "feat1" = runif(n = 50L, min = 0.0, max = 15.0)
+    , "target" = rep(c(0L, 1L), 50L)
+  )
+  dtrain <- lgb.Dataset(
+    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
+    , label = trainDF[["target"]]
+  )
+  dvalid1 <- lgb.Dataset(
+    data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
+    , label = validDF[1L:25L, "target"]
+  )
+  dvalid2 <- lgb.Dataset(
+    data = as.matrix(validDF[26L:50L, "feat1"], drop = FALSE)
+    , label = validDF[26L:50L, "target"]
+  )
+  nrounds <- 10L
+  bst <- lightgbm(
+    data = dtrain
+    , nrounds = nrounds
+    , num_leaves = 5L
+    , valids = list(
+      "valid1" = dvalid1
+      , "something-random-we-would-not-hardcode" = dtrain
+      , "valid2" = dvalid2
+    )
+    , params = list(
+      objective = "binary"
+      , metric = "auc"
+      , learning_rate = 1.5
+    )
+    , verbose = -7L
+  )
+  # when verbose <= 0 is passed to lightgbm(), 'valids' is passed through to lgb.train()
+  # untouched. If you set verbose to > 0, the training data will still be first but called "train"
+  expect_named(
+    bst$record_evals
+    , c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
+    , ignore.order = FALSE
+    , ignore.case = FALSE
+  )
+  auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
+  expect_length(auc_scores, nrounds)
+  expect_identical(bst$best_iter, which.max(auc_scores))
+  expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
+})

From eedc1a7f6f3922f93ef0d4725ded7c49961a586c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 25 Apr 2020 11:48:34 -0500
Subject: [PATCH 19/20] [python][tests] unused and missing imports (#3023)

---
 tests/c_api_test/test_.py                 | 1 -
 tests/python_package_test/test_engine.py  | 1 -
 tests/python_package_test/test_sklearn.py | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/c_api_test/test_.py b/tests/c_api_test/test_.py
index b138de4a0ef4..20593e5fe210 100644
--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@@ -1,7 +1,6 @@
 # coding: utf-8
 import ctypes
 import os
-import sys
 
 from platform import system
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c5348e9858c4..3be0568e622a 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1857,7 +1857,6 @@ def metrics_combination_cv_regression(metric_list, assumed_iteration,
         self.assertEqual(len(set([iter_valid1_l1, iter_valid1_l2, iter_valid2_l1, iter_valid2_l2])), 4)
         iter_min_l1 = min([iter_valid1_l1, iter_valid2_l1])
         iter_min_l2 = min([iter_valid1_l2, iter_valid2_l2])
-        iter_min = min([iter_min_l1, iter_min_l2])
         iter_min_valid1 = min([iter_valid1_l1, iter_valid1_l2])
 
         iter_cv_l1 = 4
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 1fb44d0dc12c..096d36a31171 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -4,6 +4,7 @@
 import math
 import os
 import unittest
+import warnings
 
 import lightgbm as lgb
 import numpy as np

From 2c18a0f3ed732df77d4304ed1d2b46a345d661c8 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 26 Apr 2020 16:05:45 -0500
Subject: [PATCH 20/20] [R-package] [ci] Add Windows CI for R package (fixes
 #2335) (#2936)

* first test of appveyor

* strings are strings

* lil bit of build script

* fixing paths

* removed unnecessary file

* updated CRAN URL

* added a lot more printing

* fixing paths

* more stuff

* fixed paths

* more stuff

* more path guessing

* even more paths

* more stuff

* moar logz

* and now for something totally different

* please work

* ok could be a thing

* changing directories

* we might be in business

* fixed install syntax

* tryinv mingw

* more mingw

* ignore Suggests check

* trying Azure DevOps

* just run bare minimum for Azure DevOps

* fixed  build dir thing

* trying to set libPaths

* more testing

* trying R 3.6.3

* R 3.6.3

* this feels right

* still messing around with libraries

* more paths

* removed duplication in Windows testing code

* simpler

* fixed conda stuff

* more conda stuff

* more fixes

* fixed testing script

* moved AppVeyor setup to the top

* commenting

* ch-ch-ch-ch-chaaaanges

* paths

* plz work

* fixed conda stuff in Windows CI

* uncommented stuff to test a full build

* fixed quotes and removed some unnecessary stuff

* added install.libs.R change

* quotes are impoortant

* added commented-out stuff back in

* added Windows script, download retries, and MSVC linking

* minor fixes

* cleaned up debugging code in FindLibR

* cleaned up debugging code and moved R first in CI

* fixed vsts-ci indentation

* cut documentation stuff out of MSVC build

* fix R CMD check for Azure

* misc whitespace changes

* Added echoing of build logs from R CMD check

* cut out more documentation tests

* fixed NOTE about imports from Matrix

* moved some changes out of this PR and into #2963

* fixed whitespace stuff

* added check on number of NOTES

* adding better checks

* fixing check on NOTEs

* removing unnecessary variable

* Update .ci/test_r_package_windows.ps1

Co-Authored-By: Nikita Titov <nekit94-08@mail.ru>

* some changes

* fix quoting

* trying MINGW on Azure DevOps

* fixing paths

* more paths

* fixing paths

* testing paths

* fixing slashes

* pinned CTAN mirror

* get better logs

* made sure Azure finds MinGW, fixed search for LIBR_CORE_LIBRARY, stopped building R docs on Azure

* Apply suggestions from code review

Co-Authored-By: Nikita Titov <nekit94-08@mail.ru>

* added CXX, CC for Windows builds and changed back to building docs on all MINGW builds

* stored LIBR_CORE_LIBRARY hints in one variable

* Apply suggestions from code review

Co-Authored-By: Nikita Titov <nekit94-08@mail.ru>

* changes from code review

* increased parallel builds for Azure CI

* Apply suggestions from code review

Co-Authored-By: Nikita Titov <nekit94-08@mail.ru>

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .appveyor.yml                              |   2 +
 .ci/test_r_package_windows.ps1             | 108 +++++++++++++++++++++
 .ci/test_windows.ps1                       |   5 +
 .vsts-ci.yml                               |   9 +-
 R-package/NAMESPACE                        |   1 +
 R-package/R/lightgbm.R                     |   1 +
 R-package/src/cmake/modules/FindLibR.cmake |  26 +++--
 R-package/src/install.libs.R               |   1 +
 8 files changed, 144 insertions(+), 9 deletions(-)
 create mode 100644 .ci/test_r_package_windows.ps1

diff --git a/.appveyor.yml b/.appveyor.yml
index f8cd85f17dcb..20807e36e6f5 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -7,6 +7,8 @@ configuration:  # a trick to construct a build matrix with multiple Python versi
 
 environment:
   matrix:
+    - COMPILER: MINGW
+      TASK: r-package
     - COMPILER: MSVC
       TASK: python
     - COMPILER: MINGW
diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1
new file mode 100644
index 000000000000..5bd65d28408b
--- /dev/null
+++ b/.ci/test_r_package_windows.ps1
@@ -0,0 +1,108 @@
+# Download a file and retry upon failure. This looks like
+# an infinite loop but CI-level timeouts will kill it
+function Download-File-With-Retries {
+  param(
+    [string]$url,
+    [string]$destfile
+  )
+  do {
+    Write-Output "Downloading ${url}"
+    sleep 5;
+    (New-Object System.Net.WebClient).DownloadFile($url, $destfile)
+  } while(!$?);
+}
+
+$env:R_WINDOWS_VERSION = "3.6.3"
+$env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/'
+$env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH
+$env:CRAN_MIRROR = "https://cloud.r-project.org/"
+$env:CTAN_MIRROR = "https://ctan.math.illinois.edu/systems/win32/miktex/tm/packages/"
+
+if ($env:COMPILER -eq "MINGW") {
+  $env:CXX = "$env:R_LIB_PATH/Rtools/mingw_64/bin/g++.exe"
+  $env:CC = "$env:R_LIB_PATH/Rtools/mingw_64/bin/gcc.exe"
+}
+
+cd $env:BUILD_SOURCESDIRECTORY
+tzutil /s "GMT Standard Time"
+[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH)
+
+if ($env:COMPILER -eq "MINGW") {
+  Write-Output "Telling R to use MinGW"
+  $install_libs = "$env:BUILD_SOURCESDIRECTORY/R-package/src/install.libs.R"
+  ((Get-Content -path $install_libs -Raw) -replace 'use_mingw <- FALSE','use_mingw <- TRUE') | Set-Content -Path $install_libs
+}
+
+# download R and RTools
+Write-Output "Downloading R and Rtools"
+Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
+Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/Rtools/Rtools35.exe" -destfile "Rtools.exe"
+
+# Install R
+Write-Output "Installing R"
+Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64" ; Check-Output $?
+Write-Output "Done installing R"
+
+Write-Output "Installing Rtools"
+Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/Rtools" ; Check-Output $?
+Write-Output "Done installing Rtools"
+
+# MiKTeX and pandoc can be skipped on non-MINGW builds, since we don't
+# build the package documentation for those
+if ($env:COMPILER -eq "MINGW") {
+    Write-Output "Downloading MiKTeX"
+    Download-File-With-Retries -url "https://miktex.org/download/win/miktexsetup-x64.zip" -destfile "miktexsetup-x64.zip"
+    Add-Type -AssemblyName System.IO.Compression.FileSystem
+    [System.IO.Compression.ZipFile]::ExtractToDirectory("miktexsetup-x64.zip", "miktex")
+    Write-Output "Setting up MiKTeX"
+    .\miktex\miktexsetup.exe --remote-package-repository="$env:CTAN_MIRROR" --local-package-repository=./miktex/download --package-set=essential --quiet download ; Check-Output $?
+    Write-Output "Installing MiKTeX"
+    .\miktex\download\miktexsetup.exe --remote-package-repository="$env:CTAN_MIRROR" --portable="$env:R_LIB_PATH/miktex" --quiet install ; Check-Output $?
+    Write-Output "Done installing MiKTeX"
+
+    initexmf --set-config-value [MPM]AutoInstall=1
+    conda install -q -y --no-deps pandoc
+}
+
+Add-Content .Renviron "R_LIBS=$env:R_LIB_PATH"
+
+Write-Output "Installing dependencies"
+$packages = "c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
+Rscript --vanilla -e "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH')" ; Check-Output $?
+
+Write-Output "Building R package"
+Rscript build_r.R --skip-install ; Check-Output $?
+
+$PKG_FILE_NAME = Get-Item *.tar.gz
+$LOG_FILE_NAME = "lightgbm.Rcheck/00check.log"
+
+$env:_R_CHECK_FORCE_SUGGESTS_ = 0
+if ($env:COMPILER -ne "MINGW") {
+  Write-Output "Running R CMD check without checking documentation"
+  R.exe CMD check --no-multiarch --no-examples --no-manual --ignore-vignettes ${PKG_FILE_NAME} ; $check_succeeded = $?
+} else {
+  Write-Output "Running R CMD check as CRAN"
+  R.exe CMD check --no-multiarch --as-cran ${PKG_FILE_NAME} ; $check_succeeded = $?
+}
+
+Write-Output "R CMD check build logs:"
+Get-Content -Path $env:BUILD_SOURCESDIRECTORY\lightgbm.Rcheck\00install.out
+
+Check-Output $check_succeeded
+
+Write-Output "Looking for issues with R CMD check results"
+if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "WARNING" -Quiet) {
+    echo "WARNINGS have been found by R CMD check!"
+    Check-Output $False
+}
+
+$note_str = Get-Content "${LOG_FILE_NAME}" | Select-String -Pattern ' NOTE' | Out-String ; Check-Output $?
+$relevant_line = $note_str -match '.*Status: (\d+) NOTE.*'
+$NUM_CHECK_NOTES = $matches[1]
+$ALLOWED_CHECK_NOTES = 3
+if ([int]$NUM_CHECK_NOTES -gt $ALLOWED_CHECK_NOTES) {
+    Write-Output "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
+    Check-Output $False
+}
+
+Write-Output "No issues were found checking the R package"
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 82849577152d..fd0e9f95a5e4 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -12,6 +12,11 @@ if (Test-Path env:APPVEYOR) {
   $env:BUILD_SOURCESDIRECTORY = $env:APPVEYOR_BUILD_FOLDER
 }
 
+if ($env:TASK -eq "r-package") {
+  & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
+  Exit 0
+}
+
 # setup for Python
 conda init powershell
 conda activate
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index a39dbc0e105b..a533ab1659e4 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -22,7 +22,7 @@ jobs:
     vmImage: 'ubuntu-latest'
   container: ubuntu1404
   strategy:
-    maxParallel: 6
+    maxParallel: 7
     matrix:
       regular:
         TASK: regular
@@ -76,7 +76,7 @@ jobs:
   pool:
     vmImage: 'macOS-10.14'
   strategy:
-    maxParallel: 3
+    maxParallel: 4
     matrix:
       regular:
         TASK: regular
@@ -117,8 +117,11 @@ jobs:
   pool:
     vmImage: 'vs2017-win2016'
   strategy:
-    maxParallel: 3
+    maxParallel: 4
     matrix:
+      r_package:
+        TASK: r-package
+        COMPILER: MINGW
       regular:
         TASK: regular
         PYTHON_VERSION: 3.6
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 7aef39a1f6de..14db7ba98d6c 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -36,6 +36,7 @@ export(saveRDS.lgb.Booster)
 export(setinfo)
 export(slice)
 import(methods)
+importFrom(Matrix,Matrix)
 importFrom(R6,R6Class)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index 9459ccc4d5c4..12640ecfb0af 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -178,6 +178,7 @@ NULL
 
 # Various imports
 #' @import methods
+#' @importFrom Matrix Matrix
 #' @importFrom R6 R6Class
 #' @useDynLib lib_lightgbm , .registration = TRUE
 NULL
diff --git a/R-package/src/cmake/modules/FindLibR.cmake b/R-package/src/cmake/modules/FindLibR.cmake
index 20c0a974e1ff..ad26f06dfeef 100644
--- a/R-package/src/cmake/modules/FindLibR.cmake
+++ b/R-package/src/cmake/modules/FindLibR.cmake
@@ -174,17 +174,31 @@ execute_process(
   OUTPUT_VARIABLE LIBR_LIB_DIR
 )
 
+set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory")
+set(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH "R executable")
+set(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH "R include directory")
+set(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH "R shared libraries directory")
+
+# where is R.so / R.dll / libR.so likely to be found?
+set(LIBR_PATH_HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_LIB_DIR}" "${LIBR_HOME}/bin/${R_ARCH}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}")
+
 # look for the core R library
 find_library(
   LIBR_CORE_LIBRARY
-  NAMES R
-  HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_LIB_DIR}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}"
+  NAMES R R.dll
+  HINTS ${LIBR_PATH_HINTS}
 )
 
-set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory")
-set(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH "R executable")
-set(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH "R include directory")
-set(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH "R shared libraries directory")
+# starting from CMake 3.17, find_library() will not find .dll files by default
+# https://cmake.org/cmake/help/v3.17/release/3.17.html#other-changes
+if (WIN32 AND NOT LIBR_CORE_LIBRARY)
+    find_file(
+        LIBR_CORE_LIBRARY
+        NAME R.dll
+        HINTS ${LIBR_PATH_HINTS}
+    )
+endif()
+
 set(LIBR_CORE_LIBRARY ${LIBR_CORE_LIBRARY} CACHE PATH "R core shared library")
 
 if(WIN32 AND MSVC)
diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R
index a79c93d846b7..339e8417ee70 100644
--- a/R-package/src/install.libs.R
+++ b/R-package/src/install.libs.R
@@ -67,6 +67,7 @@ if (!use_precompile) {
   # Check if Windows installation (for gcc vs Visual Studio)
   if (WINDOWS) {
     if (use_mingw) {
+      print("Trying to build with MinGW")
       cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ")
       build_cmd <- "mingw32-make.exe _lightgbm"
       system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools