diff --git a/.ci/test.sh b/.ci/test.sh index 1b156eab7a71..3b9ee89ae3f5 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -78,6 +78,11 @@ if [[ $TASK == "if-else" ]]; then exit 0 fi +if [[ $TASK == "r-package" ]]; then + bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit -1 + exit 0 +fi + conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh new file mode 100755 index 000000000000..1570bdf021ec --- /dev/null +++ b/.ci/test_r_package.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# set up R environment +R_LIB_PATH=~/Rlib +mkdir -p $R_LIB_PATH +echo "R_LIBS=$R_LIB_PATH" > ${HOME}/.Renviron +echo 'options(repos = "https://cran.rstudio.com")' > ${HOME}/.Rprofile +export PATH="$R_LIB_PATH/R/bin:$PATH" + +# installing precompiled R for Ubuntu +# https://cran.r-project.org/bin/linux/ubuntu/#installation +# adding steps from https://stackoverflow.com/a/56378217/3986677 to get latest version +# +# This only needs to get run on Travis because R environment for Linux +# used by Azure pipelines is set up in https://github.com/guolinke/lightgbm-ci-docker +if [[ $TRAVIS == "true" ]] && [[ $OS_NAME == "linux" ]]; then + sudo add-apt-repository \ + "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/" + sudo apt-key adv \ + --keyserver keyserver.ubuntu.com \ + --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 + sudo apt-get update + sudo apt-get install \ + --no-install-recommends \ + -y \ + r-base-dev=${R_TRAVIS_LINUX_VERSION} \ + texinfo \ + texlive-latex-recommended \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + qpdf \ + || exit -1 +fi + +# Installing R precompiled for Mac OS 10.11 or higher +if [[ $OS_NAME == "macos" ]]; then + + brew install qpdf + brew cask install basictex + export PATH="/Library/TeX/texbin:$PATH" + sudo tlmgr update --self + sudo tlmgr install inconsolata helvetic + + wget -q https://cran.r-project.org/bin/macosx/R-${R_MAC_VERSION}.pkg -O R.pkg + sudo installer \ + -pkg $(pwd)/R.pkg \ + -target / + + # Fix "duplicate libomp versions" issue on Mac + # by replacing the R libomp.dylib with a symlink to the one installed with brew + if [[ $COMPILER == "clang" ]]; then + ver_arr=( ${R_MAC_VERSION//./ } ) + R_MAJOR_MINOR="${ver_arr[0]}.${ver_arr[1]}" + sudo ln -sf \ + "$(brew --cellar libomp)"/*/lib/libomp.dylib \ + /Library/Frameworks/R.framework/Versions/${R_MAJOR_MINOR}/Resources/lib/libomp.dylib + fi +fi + +conda install \ + -y \ + -q \ + --no-deps \ + pandoc + +# Manually install Depends and Imports libraries + 'testthat' +# to avoid a CI-time dependency on devtools (for devtools::install_deps()) +Rscript -e "install.packages(c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'))" || exit -1 + +cd ${BUILD_DIRECTORY} +Rscript build_r.R || exit -1 + +PKG_TARBALL="lightgbm_${LGB_VER}.tar.gz" +LOG_FILE_NAME="lightgbm.Rcheck/00check.log" + +# suppress R CMD check warning from Suggests dependencies not being available +export _R_CHECK_FORCE_SUGGESTS_=0 + +# fails tests if either ERRORs or WARNINGs are thrown by +# R CMD CHECK +R CMD check ${PKG_TARBALL} \ + --as-cran \ +|| exit -1 + +if grep -q -R "WARNING" "$LOG_FILE_NAME"; then + echo "WARNINGS have been found by R CMD check!" + exit -1 +fi + +exit 0 diff --git a/.travis.yml b/.travis.yml index b2066e653a3f..2941a47abd38 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,7 @@ env: - TASK=mpi METHOD=pip - TASK=gpu METHOD=source PYTHON_VERSION=3.5 - TASK=gpu METHOD=pip PYTHON_VERSION=3.6 + - TASK=r-package matrix: exclude: @@ -44,9 +45,11 @@ before_install: - if [[ $TRAVIS_OS_NAME == "osx" ]]; then export OS_NAME="macos"; export COMPILER="gcc"; + export R_MAC_VERSION=3.6.1; else export OS_NAME="linux"; export COMPILER="clang"; + export R_TRAVIS_LINUX_VERSION=3.6.1-3bionic; fi - export CONDA="$HOME/miniconda" - export PATH="$CONDA/bin:$PATH" diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 9722d3e1518c..797a567a0213 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -42,6 +42,8 @@ jobs: TASK: gpu METHOD: source PYTHON_VERSION: 3.6 + r_package: + TASK: r-package steps: - script: | echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" @@ -84,6 +86,8 @@ jobs: PYTHON_VERSION: 3.5 bdist: TASK: bdist + r_package: + TASK: r-package steps: - script: | echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" @@ -95,6 +99,7 @@ jobs: echo "##vso[task.setvariable variable=CONDA]$CONDA" echo "##vso[task.prependpath]$CONDA/bin" echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_8_X64" + echo "##vso[task.setvariable variable=R_MAC_VERSION]3.6.1" displayName: 'Set variables' - bash: $(Build.SourcesDirectory)/.ci/setup.sh displayName: Setup diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index 92cef8e97c7b..309a82858cbc 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -12,3 +12,14 @@ # Code copied in at build time ^src/CMakeLists.txt$ + +# unnecessary files from submodules +^src/compute/.appveyor.yml$ +^src/compute/.coveralls.yml$ +^src/compute/.travis.yml$ +^src/compute/test/$ +^src/compute/index.html$ +^src/compute/.git$ +^src/compute/.gitignore$ +^src/compute/CONTRIBUTING.md$ +^src/compute/README.md$ diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index fcf994dbe39f..64386504ca72 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -36,4 +36,4 @@ Imports: jsonlite (>= 1.0), Matrix (>= 1.1-0), methods -RoxygenNote: 6.0.1 +RoxygenNote: 7.0.2 diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index a00c5632a21c..426488e3ca45 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -53,6 +53,8 @@ lgb.cv( \item{eval_freq}{evaluation output frequency, only effect when verbose > 0} +\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} + \item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified by the values of outcome labels.} @@ -88,8 +90,6 @@ into a predictor model which frees up memory and the original datasets} the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).} }} - -\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} } \value{ a trained model \code{lgb.CVBooster}. diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd index 64ccd2041e1e..0b273dfd2e55 100644 --- a/R-package/man/lgb.prepare_rules2.Rd +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -24,3 +24,42 @@ This is useful if you have a specific need for integer dataset instead of numeri Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. } +\examples{ +library(lightgbm) +data(iris) + +str(iris) + +new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter +str(new_iris$data) + +data(iris) # Erase iris dataset +iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA) + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1L, ] # Species became 0 as it is an unknown factor + +newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list( + Species = c( + "setosa" = 3L + , "versicolor" = 2L + , "virginica" = 1L + ) +) +newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! + +} diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R index b731ed60f706..b7aa4cb88ce6 100644 --- a/R-package/src/install.libs.R +++ b/R-package/src/install.libs.R @@ -99,6 +99,32 @@ if (!use_precompile) { # Install system(paste0(cmake_cmd, " ..")) + + # R CMD check complains about the .NOTPARALLEL directive created in the cmake + # Makefile. We don't need it here anyway since targets are built serially, so trying + # to remove it with this hack + generated_makefile <- file.path( + R_PACKAGE_SOURCE + , "src" + , "build" + , "Makefile" + ) + if (file.exists(generated_makefile)) { + makefile_txt <- readLines( + con = generated_makefile + ) + makefile_txt <- gsub( + pattern = ".*NOTPARALLEL.*" + , replacement = "" + , x = makefile_txt + ) + writeLines( + text = makefile_txt + , con = generated_makefile + , sep = "\n" + ) + } + system(build_cmd) src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/") diff --git a/R-package/tests/testthat.R b/R-package/tests/testthat.R index 17f9e04cff8b..4a1d1268db36 100644 --- a/R-package/tests/testthat.R +++ b/R-package/tests/testthat.R @@ -1,4 +1,8 @@ library(testthat) library(lightgbm) -test_check("lightgbm") +test_check( + package = "lightgbm" + , stop_on_failure = TRUE + , stop_on_warning = FALSE +) diff --git a/build_r.R b/build_r.R index 74b025847366..88133c248071 100644 --- a/build_r.R +++ b/build_r.R @@ -85,5 +85,5 @@ version <- gsub( ) tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version)) -cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball) +cmd <- sprintf("R CMD INSTALL %s --no-multiarch --with-keep.source", tarball) .run_shell_command(cmd)