From 5026ea7aa4baaeff10562bbbb07191a7aa5720c9 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Sat, 6 Nov 2021 16:03:41 +0100 Subject: [PATCH 1/4] [R-package] [docs] add intro vignette (#3946) --- .ci/lint_r_code.R | 2 +- .ci/test_r_package.sh | 6 +- .ci/test_r_package_solaris.sh | 4 +- .ci/test_r_package_valgrind.sh | 2 +- .ci/test_r_package_windows.ps1 | 11 ++- .github/workflows/r_package.yml | 4 +- .github/workflows/r_valgrind.yml | 2 +- .github/workflows/static_analysis.yml | 2 +- .vsts-ci.yml | 3 +- R-package/DESCRIPTION | 3 + R-package/README.md | 6 ++ R-package/pkgdown/_pkgdown.yml | 2 + R-package/vignettes/basic_walkthrough.Rmd | 115 ++++++++++++++++++++++ build-cran-package.sh | 61 +++++++++++- build_r.R | 10 +- docs/conf.py | 2 + 16 files changed, 218 insertions(+), 17 deletions(-) create mode 100644 R-package/vignettes/basic_walkthrough.Rmd diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index cc83cb7c1cc9..d477a1a70b9c 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -8,7 +8,7 @@ SOURCE_DIR <- args[[1L]] FILES_TO_LINT <- list.files( path = SOURCE_DIR - , pattern = "\\.r$" + , pattern = "\\.r$|\\.rmd$" , all.files = TRUE , ignore.case = TRUE , full.names = TRUE diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh index a6656dfb83e6..aec25234c9fd 100755 --- a/.ci/test_r_package.sh +++ b/.ci/test_r_package.sh @@ -92,13 +92,13 @@ if [[ $OS_NAME == "macos" ]]; then fi fi -# Manually install Depends and Imports libraries + 'testthat' +# Manually install Depends and Imports libraries + 'knitr', 'rmarkdown', 'testthat' # to avoid a CI-time dependency on devtools (for devtools::install_deps()) # NOTE: testthat is not required when running rchk if [[ "${TASK}" == "r-rchk" ]]; then - packages="c('data.table', 'jsonlite', 'Matrix', 'R6')" + packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'rmarkdown')" else - packages="c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat')" + packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'rmarkdown', 'testthat')" fi compile_from_source="both" if [[ $OS_NAME == "macos" ]]; then diff --git a/.ci/test_r_package_solaris.sh b/.ci/test_r_package_solaris.sh index 1daabf08d91a..18ed6cb2f7ad 100755 --- a/.ci/test_r_package_solaris.sh +++ b/.ci/test_r_package_solaris.sh @@ -5,7 +5,9 @@ apt-get install --no-install-recommends -y \ libxml2-dev \ libssl-dev -Rscript -e "install.packages('rhub', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 +# installation of dependencies needs to happen before building the package, +# since `R CMD build` needs to install the package to build vignettes +Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'rhub', 'testthat'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 sh build-cran-package.sh || exit -1 diff --git a/.ci/test_r_package_valgrind.sh b/.ci/test_r_package_valgrind.sh index fc27689f9090..e7a6cb027d2d 100755 --- a/.ci/test_r_package_valgrind.sh +++ b/.ci/test_r_package_valgrind.sh @@ -1,6 +1,6 @@ #!/bin/bash -RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 +RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 sh build-cran-package.sh \ --r-executable=RDvalgrind \ || exit -1 diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 index 3c0f11ca58c6..3045937c0f2b 100644 --- a/.ci/test_r_package_windows.ps1 +++ b/.ci/test_r_package_windows.ps1 @@ -122,7 +122,7 @@ Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT Write-Output "Done installing Rtools" Write-Output "Installing dependencies" -$packages = "c('data.table', 'jsonlite', 'Matrix', 'processx', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" +$packages = "c('data.table', 'jsonlite', 'knitr', 'Matrix', 'processx', 'R6', 'rmarkdown', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $? # MiKTeX and pandoc can be skipped on non-MinGW builds, since we don't @@ -165,7 +165,16 @@ if ($env:COMPILER -ne "MSVC") { } Run-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Check-Output $? } elseif ($env:R_BUILD_TYPE -eq "cran") { + # NOTE: gzip and tar are needed to create a CRAN package on Windows, but + # some flavors of tar.exe can fail in some settings on Windows. + # Putting the msys64 utilities at the beginning of PATH temporarily to be + # sure they're used for that purpose. + # $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + if ($env:R_MAJOR_VERSION -eq "3") { + $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + } Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Check-Output $? + Remove-From-Path ".*msys64.*" # Test CRAN source .tar.gz in a directory that is not this repo or below it. # When people install.packages('lightgbm'), they won't have the LightGBM # git repo around. This is to protect against the use of relative paths diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 8746297ba159..bff33548ce1d 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -194,7 +194,7 @@ jobs: - name: Install packages shell: bash run: | - RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" + RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }} RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit -1 - name: Run tests with sanitizers @@ -225,7 +225,7 @@ jobs: shell: bash run: | export PATH=/opt/R-devel/bin/:${PATH} - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" sh build-cran-package.sh R CMD check --as-cran --run-donttest lightgbm_*.tar.gz || exit -1 if grep -q -E "NOTE|WARNING|ERROR" lightgbm.Rcheck/00check.log; then diff --git a/.github/workflows/r_valgrind.yml b/.github/workflows/r_valgrind.yml index d2e2fc0c2be9..56b9dac031fe 100644 --- a/.github/workflows/r_valgrind.yml +++ b/.github/workflows/r_valgrind.yml @@ -7,7 +7,7 @@ on: jobs: test-r-valgrind: name: r-package (ubuntu-latest, R-devel, valgrind) - timeout-minutes: 180 + timeout-minutes: 240 runs-on: ubuntu-latest container: wch1/r-debug env: diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 77be3c89eae4..f631dfa89350 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -57,7 +57,7 @@ jobs: - name: Install packages shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'roxygen2', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'roxygen2', 'testthat', 'knitr', 'rmarkdown'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" sh build-cran-package.sh || exit -1 R CMD INSTALL --with-keep.source lightgbm_*.tar.gz || exit -1 - name: Test documentation diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 059ebf9d35b4..e3f350eba594 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -19,7 +19,7 @@ resources: image: 'ubuntu:latest' options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro" - container: rbase - image: rocker/r-base + image: wch1/r-debug jobs: ########################################### - job: Linux @@ -300,6 +300,7 @@ jobs: steps: - script: | LGB_VER=$(head -n 1 VERSION.txt | sed "s/rc/-/g") + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 sh build-cran-package.sh || exit -1 mv lightgbm_${LGB_VER}.tar.gz $(Build.ArtifactStagingDirectory)/lightgbm-${LGB_VER}-r-cran.tar.gz displayName: 'Build CRAN R-package' diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 490ca15b306a..e7993850f7c6 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -45,8 +45,11 @@ URL: https://github.com/Microsoft/LightGBM BugReports: https://github.com/Microsoft/LightGBM/issues NeedsCompilation: yes Biarch: true +VignetteBuilder: knitr Suggests: + knitr, processx, + rmarkdown, testthat Depends: R (>= 3.5), diff --git a/R-package/README.md b/R-package/README.md index 352071f0ad4b..d80a7f188025 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -152,6 +152,7 @@ Rscript build_r.R The `build_r.R` script builds the package in a temporary directory called `lightgbm_r`. It will destroy and recreate that directory each time you run the script. That script supports the following command-line options: +- `--no-build-vignettes`: Skip building vignettes. - `--skip-install`: Build the package tarball, but do not install it. - `--use-gpu`: Build a GPU-enabled version of the library. - `--use-mingw`: Force the use of MinGW toolchain, regardless of R version. @@ -266,6 +267,11 @@ sh build-cran-package.sh This will create a file `lightgbm_${VERSION}.tar.gz`, where `VERSION` is the version of `LightGBM`. +That script supports the following command-line options: + +- `--no-build-vignettes`: Skip building vignettes. +- `--r-executable=[path-to-executable]`: use an alternative build of R + Also, CRAN package is generated with every commit to any repo's branch and can be found in "Artifacts" section of the associated Azure Pipelines run. ### Standard Installation from CRAN Package diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml index b89ab96f3cd9..9e105d2c6bb5 100644 --- a/R-package/pkgdown/_pkgdown.yml +++ b/R-package/pkgdown/_pkgdown.yml @@ -41,6 +41,8 @@ navbar: href: ../ - icon: fa-home fa-lg href: index.html + - text: Articles + href: articles/index.html - text: Reference href: reference/index.html right: diff --git a/R-package/vignettes/basic_walkthrough.Rmd b/R-package/vignettes/basic_walkthrough.Rmd new file mode 100644 index 000000000000..f74f214e659f --- /dev/null +++ b/R-package/vignettes/basic_walkthrough.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Basic Walkthrough" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Basic Walkthrough} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/build-cran-package.sh b/build-cran-package.sh index e4def4a14657..704edfbcc63e 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -11,6 +11,11 @@ # non-standard builds of R, such as those provided in # https://github.com/wch/r-debug. # +# --no-build-vignettes Pass this flag to skip creating vignettes. +# You might want to do this to avoid installing +# vignette-only dependencies, or to avoid +# portability issues. +# # [usage] # # # default usage @@ -18,9 +23,14 @@ # # # custom R build # sh build-cran-package.sh --r-executable=RDvalgrind +# +# # skip vignette building +# sh build-cran-package.sh --no-build-vignettes set -e +# Default values of arguments +BUILD_VIGNETTES=true LGB_R_EXECUTABLE=R while [ $# -gt 0 ]; do @@ -28,6 +38,9 @@ while [ $# -gt 0 ]; do --r-executable=*) LGB_R_EXECUTABLE="${1#*=}" ;; + --no-build-vignettes=*) + BUILD_VIGNETTES=false + ;; *) echo "invalid argument '${1}'" exit -1 @@ -57,6 +70,10 @@ cp -R R-package/* "${TEMP_R_DIR}" cp -R include "${TEMP_R_DIR}/src/" cp -R src/* "${TEMP_R_DIR}/src/" +if ${BUILD_VIGNETTES} ; then + cp docs/logo/LightGBM_logo_black_text.svg "${TEMP_R_DIR}/vignettes/" +fi + cp \ external_libs/fast_double_parser/include/fast_double_parser.h \ "${TEMP_R_DIR}/src/include/LightGBM" @@ -169,8 +186,46 @@ cd "${TEMP_R_DIR}" cd "${ORIG_WD}" -"${LGB_R_EXECUTABLE}" CMD build \ - --keep-empty-dirs \ - lightgbm_r +if ${BUILD_VIGNETTES} ; then + "${LGB_R_EXECUTABLE}" CMD build \ + --keep-empty-dirs \ + lightgbm_r + + echo "removing object files created by vignettes" + rm -rf ./_tmp + mkdir _tmp + TARBALL_NAME="lightgbm_${LGB_VERSION}.tar.gz" + mv "${TARBALL_NAME}" _tmp/ + + echo "untarring ${TARBALL_NAME}" + cd _tmp + tar -xvf "${TARBALL_NAME}" > /dev/null 2>&1 + rm -rf "${TARBALL_NAME}" + cd .. + echo "done untarring ${TARBALL_NAME}" + + echo "re-tarring ${TARBALL_NAME}" + tar \ + -czv \ + -C ./_tmp \ + --exclude=*.a \ + --exclude=*.dll \ + --exclude=*.o \ + --exclude=*.so \ + --exclude=*.tar.gz \ + --exclude=**/conftest.c \ + --exclude=**/conftest.exe \ + -f "${TARBALL_NAME}" \ + lightgbm \ + > /dev/null 2>&1 + echo "Done creating ${TARBALL_NAME}" + + rm -rf ./_tmp +else + "${LGB_R_EXECUTABLE}" CMD build \ + --keep-empty-dirs \ + --no-build-vignettes \ + lightgbm_r +fi echo "Done building R package" diff --git a/build_r.R b/build_r.R index aa16c6fcdc6c..dc3f61e6efa1 100644 --- a/build_r.R +++ b/build_r.R @@ -36,6 +36,7 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src") } parsed_args <- .parse_args(args) +SKIP_VIGNETTES <- "--no-build-vignettes" %in% parsed_args[["flags"]] USING_GPU <- "--use-gpu" %in% parsed_args[["flags"]] USING_MINGW <- "--use-mingw" %in% parsed_args[["flags"]] USING_MSYS2 <- "--use-msys2" %in% parsed_args[["flags"]] @@ -51,7 +52,8 @@ ARGS_TO_DEFINES <- c( ) recognized_args <- c( - "--skip-install" + "--no-build-vignettes" + , "--skip-install" , "--use-gpu" , "--use-mingw" , "--use-msys2" @@ -407,7 +409,11 @@ writeLines(namespace_contents, NAMESPACE_FILE) # NOTE: --keep-empty-dirs is necessary to keep the deep paths expected # by CMake while also meeting the CRAN req to create object files # on demand -.run_shell_command("R", c("CMD", "build", TEMP_R_DIR, "--keep-empty-dirs")) +r_build_args <- c("CMD", "build", TEMP_R_DIR, "--keep-empty-dirs") +if (isTRUE(SKIP_VIGNETTES)) { + r_build_args <- c(r_build_args, "--no-build-vignettes") +} +.run_shell_command("R", r_build_args) # Install the package version <- gsub( diff --git a/docs/conf.py b/docs/conf.py index b48ddce16c10..13751145d422 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -273,8 +273,10 @@ def generate_r_docs(app: Sphinx) -> None: r-base=4.1.0=hb67fd72_2 \ r-data.table=1.14.0=r41hcfec24a_0 \ r-jsonlite=1.7.2=r41hcfec24a_0 \ + r-knitr=1.35=r41hc72bb7e_0 \ r-matrix=1.3_4=r41he454529_0 \ r-pkgdown=1.6.1=r41hc72bb7e_0 \ + r-rmarkdown=2.11=r41hc72bb7e_0 \ r-roxygen2=7.1.1=r41h03ef668_0 source /home/docs/.conda/bin/activate r_env export TAR=/bin/tar From 40fb2e2f1982402798776ee44e4ec82fc4644d3d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 7 Nov 2021 12:03:30 -0600 Subject: [PATCH 2/4] add 10 test vignettes --- R-package/vignettes/test_vignette_1.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_10.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_2.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_3.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_4.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_5.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_6.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_7.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_8.Rmd | 115 +++++++++++++++++++++++ R-package/vignettes/test_vignette_9.Rmd | 115 +++++++++++++++++++++++ 10 files changed, 1150 insertions(+) create mode 100644 R-package/vignettes/test_vignette_1.Rmd create mode 100644 R-package/vignettes/test_vignette_10.Rmd create mode 100644 R-package/vignettes/test_vignette_2.Rmd create mode 100644 R-package/vignettes/test_vignette_3.Rmd create mode 100644 R-package/vignettes/test_vignette_4.Rmd create mode 100644 R-package/vignettes/test_vignette_5.Rmd create mode 100644 R-package/vignettes/test_vignette_6.Rmd create mode 100644 R-package/vignettes/test_vignette_7.Rmd create mode 100644 R-package/vignettes/test_vignette_8.Rmd create mode 100644 R-package/vignettes/test_vignette_9.Rmd diff --git a/R-package/vignettes/test_vignette_1.Rmd b/R-package/vignettes/test_vignette_1.Rmd new file mode 100644 index 000000000000..3db996f3cb18 --- /dev/null +++ b/R-package/vignettes/test_vignette_1.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 1" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 1} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_10.Rmd b/R-package/vignettes/test_vignette_10.Rmd new file mode 100644 index 000000000000..4792b14c900b --- /dev/null +++ b/R-package/vignettes/test_vignette_10.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 10" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 10} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_2.Rmd b/R-package/vignettes/test_vignette_2.Rmd new file mode 100644 index 000000000000..2e55c959aab6 --- /dev/null +++ b/R-package/vignettes/test_vignette_2.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 2" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 2} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_3.Rmd b/R-package/vignettes/test_vignette_3.Rmd new file mode 100644 index 000000000000..065658a330f5 --- /dev/null +++ b/R-package/vignettes/test_vignette_3.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 3" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 3} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_4.Rmd b/R-package/vignettes/test_vignette_4.Rmd new file mode 100644 index 000000000000..47c204a37a16 --- /dev/null +++ b/R-package/vignettes/test_vignette_4.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 4" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 4} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_5.Rmd b/R-package/vignettes/test_vignette_5.Rmd new file mode 100644 index 000000000000..3369c4f0b5f1 --- /dev/null +++ b/R-package/vignettes/test_vignette_5.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 5" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 5} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_6.Rmd b/R-package/vignettes/test_vignette_6.Rmd new file mode 100644 index 000000000000..65519f41b955 --- /dev/null +++ b/R-package/vignettes/test_vignette_6.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 6" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 6} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_7.Rmd b/R-package/vignettes/test_vignette_7.Rmd new file mode 100644 index 000000000000..a8cc44d4b1cb --- /dev/null +++ b/R-package/vignettes/test_vignette_7.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 7" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 7} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_8.Rmd b/R-package/vignettes/test_vignette_8.Rmd new file mode 100644 index 000000000000..a3f08ae78ad8 --- /dev/null +++ b/R-package/vignettes/test_vignette_8.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 8" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 8} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_9.Rmd b/R-package/vignettes/test_vignette_9.Rmd new file mode 100644 index 000000000000..34ae60d15fc3 --- /dev/null +++ b/R-package/vignettes/test_vignette_9.Rmd @@ -0,0 +1,115 @@ +--- +title: + "Test 9" +description: > + This vignette describes how to train a LightGBM model for binary classification. +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Test 9} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE + , comment = "#>" + , warning = FALSE + , message = FALSE +) +``` + +## Introduction + +Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). + +```{r setup} +library(lightgbm) +``` + +This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. + +## The dataset + +The dataset looks as follows. + +```{r} +data(bank, package = "lightgbm") + +bank[1L:5L, c("y", "age", "balance")] + +# Distribution of the response +table(bank$y) +``` + +## Training the model + +The R package of LightGBM offers two functions to train a model: + +- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. +- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. + +### Using the `lightgbm()` function + +In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. + +```{r} +# Numeric response and feature matrix +y <- as.numeric(bank$y == "yes") +X <- data.matrix(bank[, c("age", "balance")]) + +# Train +fit <- lightgbm( + data = X + , label = y + , num_leaves = 4L + , learning_rate = 1.0 + , nrounds = 10L + , objective = "binary" + , verbose = -1L +) + +# Result +summary(predict(fit, X)) +``` + +It seems to have worked! And the predictions are indeed probabilities between 0 and 1. + +### Using the `lgb.train()` function + +Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. + +```{r} +# Data interface +dtrain <- lgb.Dataset(X, label = y) + +# Parameters +params <- list( + objective = "binary" + , num_leaves = 4L + , learning_rate = 1.0 +) + +# Train +fit <- lgb.train( + params + , data = dtrain + , nrounds = 10L + , verbose = -1L +) +``` + +Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. + +```{r, echo = FALSE, results = "hide"} +# Cleanup +if (file.exists("lightgbm.model")) { + file.remove("lightgbm.model") +} +``` + +## References + +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). + +Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. From 93a3726395d7e7c246d5f23f354068548aeaa97f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 7 Nov 2021 12:26:39 -0600 Subject: [PATCH 3/4] Revert "add 10 test vignettes" This reverts commit 40fb2e2f1982402798776ee44e4ec82fc4644d3d. --- R-package/vignettes/test_vignette_1.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_10.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_2.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_3.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_4.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_5.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_6.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_7.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_8.Rmd | 115 ----------------------- R-package/vignettes/test_vignette_9.Rmd | 115 ----------------------- 10 files changed, 1150 deletions(-) delete mode 100644 R-package/vignettes/test_vignette_1.Rmd delete mode 100644 R-package/vignettes/test_vignette_10.Rmd delete mode 100644 R-package/vignettes/test_vignette_2.Rmd delete mode 100644 R-package/vignettes/test_vignette_3.Rmd delete mode 100644 R-package/vignettes/test_vignette_4.Rmd delete mode 100644 R-package/vignettes/test_vignette_5.Rmd delete mode 100644 R-package/vignettes/test_vignette_6.Rmd delete mode 100644 R-package/vignettes/test_vignette_7.Rmd delete mode 100644 R-package/vignettes/test_vignette_8.Rmd delete mode 100644 R-package/vignettes/test_vignette_9.Rmd diff --git a/R-package/vignettes/test_vignette_1.Rmd b/R-package/vignettes/test_vignette_1.Rmd deleted file mode 100644 index 3db996f3cb18..000000000000 --- a/R-package/vignettes/test_vignette_1.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 1" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 1} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_10.Rmd b/R-package/vignettes/test_vignette_10.Rmd deleted file mode 100644 index 4792b14c900b..000000000000 --- a/R-package/vignettes/test_vignette_10.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 10" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 10} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_2.Rmd b/R-package/vignettes/test_vignette_2.Rmd deleted file mode 100644 index 2e55c959aab6..000000000000 --- a/R-package/vignettes/test_vignette_2.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 2" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 2} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_3.Rmd b/R-package/vignettes/test_vignette_3.Rmd deleted file mode 100644 index 065658a330f5..000000000000 --- a/R-package/vignettes/test_vignette_3.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 3" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 3} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_4.Rmd b/R-package/vignettes/test_vignette_4.Rmd deleted file mode 100644 index 47c204a37a16..000000000000 --- a/R-package/vignettes/test_vignette_4.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 4" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 4} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_5.Rmd b/R-package/vignettes/test_vignette_5.Rmd deleted file mode 100644 index 3369c4f0b5f1..000000000000 --- a/R-package/vignettes/test_vignette_5.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 5" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 5} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_6.Rmd b/R-package/vignettes/test_vignette_6.Rmd deleted file mode 100644 index 65519f41b955..000000000000 --- a/R-package/vignettes/test_vignette_6.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 6" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 6} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_7.Rmd b/R-package/vignettes/test_vignette_7.Rmd deleted file mode 100644 index a8cc44d4b1cb..000000000000 --- a/R-package/vignettes/test_vignette_7.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 7" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 7} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_8.Rmd b/R-package/vignettes/test_vignette_8.Rmd deleted file mode 100644 index a3f08ae78ad8..000000000000 --- a/R-package/vignettes/test_vignette_8.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 8" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 8} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. diff --git a/R-package/vignettes/test_vignette_9.Rmd b/R-package/vignettes/test_vignette_9.Rmd deleted file mode 100644 index 34ae60d15fc3..000000000000 --- a/R-package/vignettes/test_vignette_9.Rmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: - "Test 9" -description: > - This vignette describes how to train a LightGBM model for binary classification. -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Test 9} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE - , comment = "#>" - , warning = FALSE - , message = FALSE -) -``` - -## Introduction - -Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), a highly efficient gradient boosting implementation (Ke et al. 2017). - -```{r setup} -library(lightgbm) -``` - -This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. - -## The dataset - -The dataset looks as follows. - -```{r} -data(bank, package = "lightgbm") - -bank[1L:5L, c("y", "age", "balance")] - -# Distribution of the response -table(bank$y) -``` - -## Training the model - -The R package of LightGBM offers two functions to train a model: - -- `lgb.train()`: This is the main training logic. It offers full flexibility but requires a `Dataset` object created by the `lgb.Dataset()` function. -- `lightgbm()`: Simpler, but less flexible. Data can be passed without having to bother with `lgb.Dataset()`. - -### Using the `lightgbm()` function - -In a first step, you need to convert data to numeric. Afterwards, you are ready to fit the model by the `lightgbm()` function. - -```{r} -# Numeric response and feature matrix -y <- as.numeric(bank$y == "yes") -X <- data.matrix(bank[, c("age", "balance")]) - -# Train -fit <- lightgbm( - data = X - , label = y - , num_leaves = 4L - , learning_rate = 1.0 - , nrounds = 10L - , objective = "binary" - , verbose = -1L -) - -# Result -summary(predict(fit, X)) -``` - -It seems to have worked! And the predictions are indeed probabilities between 0 and 1. - -### Using the `lgb.train()` function - -Alternatively, you can go for the more flexible interface `lgb.train()`. Here, as an additional step, you need to prepare `y` and `X` by the data API `lgb.Dataset()` of LightGBM. Parameters are passed to `lgb.train()` as a named list. - -```{r} -# Data interface -dtrain <- lgb.Dataset(X, label = y) - -# Parameters -params <- list( - objective = "binary" - , num_leaves = 4L - , learning_rate = 1.0 -) - -# Train -fit <- lgb.train( - params - , data = dtrain - , nrounds = 10L - , verbose = -1L -) -``` - -Try it out! If stuck, visit LightGBM's [documentation](https://lightgbm.readthedocs.io/en/latest/R/index.html) for more details. - -```{r, echo = FALSE, results = "hide"} -# Cleanup -if (file.exists("lightgbm.model")) { - file.remove("lightgbm.model") -} -``` - -## References - -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). - -Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31. From e76a8d43e4b473b30c78e28c625f9d8221b06b9c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 14 Nov 2021 22:59:54 -0500 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Nikita Titov --- .ci/test_r_package_windows.ps1 | 3 +-- .github/workflows/static_analysis.yml | 2 +- .vsts-ci.yml | 2 +- R-package/README.md | 4 ++-- R-package/vignettes/basic_walkthrough.Rmd | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 index 8ccf74695765..62562b471345 100644 --- a/.ci/test_r_package_windows.ps1 +++ b/.ci/test_r_package_windows.ps1 @@ -169,9 +169,8 @@ if ($env:COMPILER -ne "MSVC") { # some flavors of tar.exe can fail in some settings on Windows. # Putting the msys64 utilities at the beginning of PATH temporarily to be # sure they're used for that purpose. - # $env:PATH = "C:\msys64\usr\bin;" + $env:PATH if ($env:R_MAJOR_VERSION -eq "3") { - $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + $env:PATH = "C:\msys64\usr\bin;" + $env:PATH } Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Check-Output $? Remove-From-Path ".*msys64.*" diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index f631dfa89350..123a93e2462b 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -57,7 +57,7 @@ jobs: - name: Install packages shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'roxygen2', 'testthat', 'knitr', 'rmarkdown'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'roxygen2', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" sh build-cran-package.sh || exit -1 R CMD INSTALL --with-keep.source lightgbm_*.tar.gz || exit -1 - name: Test documentation diff --git a/.vsts-ci.yml b/.vsts-ci.yml index e3f350eba594..ccf6245e3dc4 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -300,7 +300,7 @@ jobs: steps: - script: | LGB_VER=$(head -n 1 VERSION.txt | sed "s/rc/-/g") - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1 sh build-cran-package.sh || exit -1 mv lightgbm_${LGB_VER}.tar.gz $(Build.ArtifactStagingDirectory)/lightgbm-${LGB_VER}-r-cran.tar.gz displayName: 'Build CRAN R-package' diff --git a/R-package/README.md b/R-package/README.md index a9e32be27d93..a32ab91c1576 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -153,7 +153,7 @@ Rscript build_r.R The `build_r.R` script builds the package in a temporary directory called `lightgbm_r`. It will destroy and recreate that directory each time you run the script. That script supports the following command-line options: - `--no-build-vignettes`: Skip building vignettes. -- `-j[jobs]`: number of threads to use when compiling LightGBM. E.g., `-j4` will try to compile 4 objects at a time. +- `-j[jobs]`: Number of threads to use when compiling LightGBM. E.g., `-j4` will try to compile 4 objects at a time. - by default, this script uses single-thread compilation - for best results, set `-j` to the number of physical CPUs - `--skip-install`: Build the package tarball, but do not install it. @@ -273,7 +273,7 @@ This will create a file `lightgbm_${VERSION}.tar.gz`, where `VERSION` is the ver That script supports the following command-line options: - `--no-build-vignettes`: Skip building vignettes. -- `--r-executable=[path-to-executable]`: use an alternative build of R +- `--r-executable=[path-to-executable]`: Use an alternative build of R. Also, CRAN package is generated with every commit to any repo's branch and can be found in "Artifacts" section of the associated Azure Pipelines run. diff --git a/R-package/vignettes/basic_walkthrough.Rmd b/R-package/vignettes/basic_walkthrough.Rmd index f74f214e659f..bfdabde7f90e 100644 --- a/R-package/vignettes/basic_walkthrough.Rmd +++ b/R-package/vignettes/basic_walkthrough.Rmd @@ -110,6 +110,6 @@ if (file.exists("lightgbm.model")) { ## References -Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (Nip 2017). +Ke, Guolin, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. "LightGBM: A Highly Efficient Gradient Boosting Decision Tree." In Advances in Neural Information Processing Systems 30 (NIPS 2017). Moro, Sérgio, Paulo Cortez, and Paulo Rita. 2014. "A Data-Driven Approach to Predict the Success of Bank Telemarketing." Decision Support Systems 62: 22–31.