From 5e320a31b44bb2cc0b5ab3b82269d61899a083d3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 28 Aug 2021 00:19:25 -0500 Subject: [PATCH 1/4] [R-package] fix warnings in demos --- .gitignore | 3 +- R-package/demo/basic_walkthrough.R | 49 +++++++------------- R-package/demo/early_stopping.R | 2 +- R-package/demo/leaf_stability.R | 31 +++++++++---- R-package/demo/multiclass_custom_objective.R | 26 ++++++----- R-package/demo/weight_param.R | 12 ++--- 6 files changed, 62 insertions(+), 61 deletions(-) diff --git a/.gitignore b/.gitignore index 5a90094850b9..96e0700f4f49 100644 --- a/.gitignore +++ b/.gitignore @@ -326,7 +326,7 @@ coverage.xml .hypothesis/ **/coverage.html **/coverage.html.zip -R-package/tests/testthat/Rplots.pdf +**/Rplots.pdf # Translations *.mo @@ -427,6 +427,7 @@ miktex*.zip *.def # Files created by examples and tests +*.buffer **/lgb-Dataset.data **/lgb.Dataset.data **/model.txt diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index 6716bb894840..3dc672e11d73 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -12,6 +12,14 @@ test <- agaricus.test class(train$label) class(train$data) +# Set parameters for model training +train_params <- list( + num_leaves = 4L + , learning_rate = 1.0 + , objective = "binary" + , nthread = 2L +) + #--------------------Basic Training using lightgbm---------------- # This is the basic usage of lightgbm you can put matrix in data field # Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input @@ -19,22 +27,18 @@ class(train$data) print("Training lightgbm with sparseMatrix") bst <- lightgbm( data = train$data + , params = train_params , label = train$label - , num_leaves = 4L - , learning_rate = 1.0 , nrounds = 2L - , objective = "binary" ) # Alternatively, you can put in dense matrix, i.e. basic R-matrix print("Training lightgbm with Matrix") bst <- lightgbm( data = as.matrix(train$data) + , params = train_params , label = train$label - , num_leaves = 4L - , learning_rate = 1.0 , nrounds = 2L - , objective = "binary" ) # You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features @@ -45,42 +49,32 @@ dtrain <- lgb.Dataset( ) bst <- lightgbm( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L - , objective = "binary" ) # Verbose = 0,1,2 print("Train lightgbm with verbose 0, no message") bst <- lightgbm( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L - , objective = "binary" , verbose = 0L ) print("Train lightgbm with verbose 1, print evaluation metric") bst <- lightgbm( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L - , nthread = 2L - , objective = "binary" , verbose = 1L ) print("Train lightgbm with verbose 2, also print information about tree") bst <- lightgbm( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L - , nthread = 2L - , objective = "binary" , verbose = 2L ) @@ -126,25 +120,19 @@ valids <- list(train = dtrain, test = dtest) print("Train lightgbm using lgb.train with valids") bst <- lgb.train( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L , valids = valids - , nthread = 2L - , objective = "binary" ) # We can change evaluation metrics, or use multiple evaluation metrics print("Train lightgbm using lgb.train with valids, watch logloss and error") bst <- lgb.train( data = dtrain - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L , valids = valids , eval = c("binary_error", "binary_logloss") - , nthread = 2L - , objective = "binary" ) # lgb.Dataset can also be saved using lgb.Dataset.save @@ -154,12 +142,9 @@ lgb.Dataset.save(dtrain, "dtrain.buffer") dtrain2 <- lgb.Dataset("dtrain.buffer") bst <- lgb.train( data = dtrain2 - , num_leaves = 4L - , learning_rate = 1.0 + , params = train_params , nrounds = 2L , valids = valids - , nthread = 2L - , objective = "binary" ) # information can be extracted from lgb.Dataset using getinfo diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index f68b82dc3dcb..fa8abce38b08 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -46,7 +46,7 @@ bst <- lgb.train( , dtrain , num_round , valids - , objective = logregobj + , obj = logregobj , eval = evalerror , early_stopping_round = 3L ) diff --git a/R-package/demo/leaf_stability.R b/R-package/demo/leaf_stability.R index bad2e83107b1..af1c533ac5b1 100644 --- a/R-package/demo/leaf_stability.R +++ b/R-package/demo/leaf_stability.R @@ -85,18 +85,21 @@ test <- agaricus.test dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) # setup parameters and we train a model -params <- list(objective = "regression", metric = "l2") +params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 0.1 + , bagging_fraction = 0.1 + , bagging_freq = 1L + , bagging_seed = 1L +) valids <- list(test = dtest) model <- lgb.train( params , dtrain , 50L , valids - , min_data = 1L - , learning_rate = 0.1 - , bagging_fraction = 0.1 - , bagging_freq = 1L - , bagging_seed = 1L ) # We create a data.frame with the following structure: @@ -141,13 +144,17 @@ table(new_data$binned) .depth_density_plot(df = new_data) # Now, let's show with other parameters +params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 +) model2 <- lgb.train( params , dtrain , 100L , valids - , min_data = 1L - , learning_rate = 1.0 ) # We create the data structure, but for model2 @@ -193,13 +200,17 @@ table(new_data2$binned) .depth_density_plot(df = new_data2) # Now, try with very severe overfitting +params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 +) model3 <- lgb.train( params , dtrain , 1000L , valids - , min_data = 1L - , learning_rate = 1.0 ) # We create the data structure, but for model3 diff --git a/R-package/demo/multiclass_custom_objective.R b/R-package/demo/multiclass_custom_objective.R index ec2ed90cdf64..f7625df0afe5 100644 --- a/R-package/demo/multiclass_custom_objective.R +++ b/R-package/demo/multiclass_custom_objective.R @@ -20,18 +20,20 @@ valids <- list(train = dtrain, test = dtest) # Method 1 of training with built-in multiclass objective # Note: need to turn off boost from average to match custom objective # (https://github.com/microsoft/LightGBM/issues/1846) +params <- list( + min_data = 1L + , learning_rate = 1.0 + , num_class = 3L + , boost_from_average = FALSE + , metric = "multi_logloss" +) model_builtin <- lgb.train( - list() + params = params , dtrain - , boost_from_average = FALSE , 100L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L - , objective = "multiclass" - , metric = "multi_logloss" - , num_class = 3L + , obj = "multiclass" ) preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) @@ -92,17 +94,19 @@ custom_multiclass_metric <- function(preds, dtrain) { )) } +params <- list( + min_data = 1L + , learning_rate = 1.0 + , num_class = 3L +) model_custom <- lgb.train( list() , dtrain , 100L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L - , objective = custom_multiclass_obj + , obj = custom_multiclass_obj , eval = custom_multiclass_metric - , num_class = 3L ) preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) diff --git a/R-package/demo/weight_param.R b/R-package/demo/weight_param.R index 461b8caa79be..6eb5ce7aa87e 100644 --- a/R-package/demo/weight_param.R +++ b/R-package/demo/weight_param.R @@ -34,14 +34,14 @@ params <- list( , num_leaves = 7L , max_depth = 3L , nthread = 1L + , min_data = 1L + , learning_raate = 1.0 ) model <- lgb.train( params , dtrain , 50L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L ) weight_loss <- as.numeric(model$record_evals$test$l2$eval) @@ -58,14 +58,14 @@ params <- list( , num_leaves = 7L , max_depth = 3L , nthread = 1L + , min_data = 1L + , learning_rate = 1.0 ) model <- lgb.train( params , dtrain , 50L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L ) small_weight_loss <- as.numeric(model$record_evals$test$l2$eval) @@ -94,14 +94,14 @@ params <- list( , num_leaves = 7L , max_depth = 3L , nthread = 1L + , min_data = 1L + , learning_rate = 1.0 ) model <- lgb.train( params , dtrain , 50L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L ) large_weight_loss <- as.numeric(model$record_evals$test$l2$eval) From 0cd9f1b0fdb1b677f180058c75dc7edb44ff613c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 28 Aug 2021 22:21:47 +0100 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Nikita Titov --- R-package/demo/multiclass_custom_objective.R | 4 ++-- R-package/demo/weight_param.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/demo/multiclass_custom_objective.R b/R-package/demo/multiclass_custom_objective.R index f7625df0afe5..70d5c6ce3f90 100644 --- a/R-package/demo/multiclass_custom_objective.R +++ b/R-package/demo/multiclass_custom_objective.R @@ -28,7 +28,7 @@ params <- list( , metric = "multi_logloss" ) model_builtin <- lgb.train( - params = params + params , dtrain , 100L , valids @@ -100,7 +100,7 @@ params <- list( , num_class = 3L ) model_custom <- lgb.train( - list() + params , dtrain , 100L , valids diff --git a/R-package/demo/weight_param.R b/R-package/demo/weight_param.R index 6eb5ce7aa87e..9702de41ece9 100644 --- a/R-package/demo/weight_param.R +++ b/R-package/demo/weight_param.R @@ -35,7 +35,7 @@ params <- list( , max_depth = 3L , nthread = 1L , min_data = 1L - , learning_raate = 1.0 + , learning_rate = 1.0 ) model <- lgb.train( params From 3838d33e9c0a8c090838ed432ff167ee09b20720 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 28 Aug 2021 16:28:47 -0500 Subject: [PATCH 3/4] fix additional params issues in multiclass and categorical_feature examples --- R-package/demo/categorical_features_rules.R | 1 - R-package/demo/multiclass.R | 24 +++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/R-package/demo/categorical_features_rules.R b/R-package/demo/categorical_features_rules.R index 68df09f24f3b..97af5a7896b5 100644 --- a/R-package/demo/categorical_features_rules.R +++ b/R-package/demo/categorical_features_rules.R @@ -85,7 +85,6 @@ params <- list( , metric = "l2" , min_data = 1L , learning_rate = 0.1 - , min_data = 0L , min_hessian = 1.0 , max_depth = 2L ) diff --git a/R-package/demo/multiclass.R b/R-package/demo/multiclass.R index 00b49e83f6de..7fbd20ea504b 100644 --- a/R-package/demo/multiclass.R +++ b/R-package/demo/multiclass.R @@ -18,14 +18,18 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5 valids <- list(test = dtest) # Method 1 of training -params <- list(objective = "multiclass", metric = "multi_error", num_class = 3L) +params <- list( + objective = "multiclass" + , metric = "multi_error" + , num_class = 3L + , min_data = 1L + , learning_rate = 1.0 +) model <- lgb.train( params , dtrain , 100L , valids - , min_data = 1L - , learning_rate = 1.0 , early_stopping_rounds = 10L ) @@ -34,18 +38,20 @@ model <- lgb.train( my_preds <- predict(model, test[, 1L:4L]) # Method 2 of training, identical -model <- lgb.train( - list() - , dtrain - , 100L - , valids - , min_data = 1L +params <- list( + min_data = 1L , learning_rate = 1.0 , early_stopping_rounds = 10L , objective = "multiclass" , metric = "multi_error" , num_class = 3L ) +model <- lgb.train( + params + , dtrain + , 100L + , valids +) # We can predict on test data, identical my_preds <- predict(model, test[, 1L:4L]) From 35c6205269541366fb76735c8db6bba698eecbcc Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 28 Aug 2021 23:10:09 +0100 Subject: [PATCH 4/4] Update R-package/demo/multiclass.R Co-authored-by: Nikita Titov --- R-package/demo/multiclass.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/demo/multiclass.R b/R-package/demo/multiclass.R index 7fbd20ea504b..0f52fecc0b26 100644 --- a/R-package/demo/multiclass.R +++ b/R-package/demo/multiclass.R @@ -41,7 +41,6 @@ my_preds <- predict(model, test[, 1L:4L]) params <- list( min_data = 1L , learning_rate = 1.0 - , early_stopping_rounds = 10L , objective = "multiclass" , metric = "multi_error" , num_class = 3L @@ -51,6 +50,7 @@ model <- lgb.train( , dtrain , 100L , valids + , early_stopping_rounds = 10L ) # We can predict on test data, identical