diff --git a/.appveyor.yml b/.appveyor.yml index f8cd85f17dcb..70764849c0de 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,4 +1,4 @@ -version: 2.3.2.{build} +version: 2.3.2.{build} image: Visual Studio 2015 platform: x64 @@ -7,6 +7,9 @@ configuration: # a trick to construct a build matrix with multiple Python versi environment: matrix: + - COMPILER: MINGW + TASK: r-package + R_WINDOWS_VERSION: 3.6.3 - COMPILER: MSVC TASK: python - COMPILER: MINGW diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 new file mode 100644 index 000000000000..5273e26809b4 --- /dev/null +++ b/.ci/test_r_package_windows.ps1 @@ -0,0 +1,94 @@ +# Download a file and retry upon failure. This looks like +# an infinite loop but CI-level timeouts will kill it +function Download-File-With-Retries { + param( + [string]$url, + [string]$destfile + ) + do { + Write-Output "Downloading '${url}'" + sleep 5; + (New-Object System.Net.WebClient).DownloadFile($url, $destfile) + } while(!$?); +} + +$env:R_LIB_PATH = "C:/RLibrary" +$env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH +$env:BINPREF = "C:/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/bin/" +$env:CRAN_MIRROR = "https://cloud.r-project.org/" + +cd $env:BUILD_SOURCESDIRECTORY +tzutil /s "GMT Standard Time" +[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH) + +if ($env:COMPILER -eq "MINGW") { + Write-Output "Telling R to use MinGW" + $install_libs = "$env:BUILD_SOURCESDIRECTORY\R-package\src\install.libs.R" + ((Get-Content -path $install_libs -Raw) -replace 'use_mingw <- FALSE','use_mingw <- TRUE') | Set-Content -Path $install_libs +} + +# set up R if it doesn't exist yet +if (!(Get-Command R.exe -errorAction SilentlyContinue)) { + + Write-Output "Downloading R and Rtools" + + # download R and RTools + Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe" + Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/Rtools/Rtools35.exe" -destfile "Rtools.exe" + + # Install R + Write-Output "Installing R" + Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64" ; Check-Output $? + Write-Output "Done installing R" + + Write-Output "Installing Rtools" + Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/Rtools" ; Check-Output $? + Write-Output "Done installing Rtools" + + # download Miktex + Write-Output "Downloading MiKTeX" + Download-File-With-Retries -url "https://miktex.org/download/win/miktexsetup-x64.zip" -destfile "miktexsetup-x64.zip" + Add-Type -AssemblyName System.IO.Compression.FileSystem + [System.IO.Compression.ZipFile]::ExtractToDirectory("miktexsetup-x64.zip", "miktex") + Write-Output "Setting up MiKTeX" + .\miktex\miktexsetup.exe --local-package-repository=./miktex/download --package-set=essential --quiet download ; Check-Output $? + Write-Output "Installing MiKTeX" + .\miktex\download\miktexsetup.exe --portable="$env:R_LIB_PATH/miktex" --quiet install ; Check-Output $? + Write-Output "Done installing R, Rtools, and MiKTeX" +} + +initexmf --set-config-value [MPM]AutoInstall=1 +conda install -y --no-deps pandoc + +Add-Content .Renviron "R_LIBS=$env:R_LIB_PATH" + +Write-Output "Installing dependencies" +$packages = "c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" +Rscript --vanilla -e "install.packages($packages, repos = '$env:CRAN_MIRROR', pkgType = 'binary', lib = '$env:R_LIB_PATH', install.packages.check.source = 'no')" ; Check-Output $? + +Write-Output "Building R package" +Rscript build_r.R --skip-install ; Check-Output $? + +$PKG_FILE_NAME = Get-Item *.tar.gz +$LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" + +$env:_R_CHECK_FORCE_SUGGESTS_=0 +if ($env:AZURE -eq "true") { + Write-Output "Running R CMD check without checking documentation" + R.exe CMD check --no-multiarch --no-manual --ignore-vignettes ${PKG_FILE_NAME} ; Check-Output $? +} else { + Write-Output "Running R CMD check as CRAN" + R.exe CMD check --no-multiarch --as-cran ${PKG_FILE_NAME} ; Check-Output $? +} + +Write-Output "R CMD check build logs:" +Get-Content -Path $env:BUILD_SOURCESDIRECTORY\lightgbm.Rcheck\00install.out + +Write-Output "Looking for issues with R CMD check results" +if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "WARNING" -Quiet) { + echo "WARNINGS have been found by R CMD check!" + Check-Output $False +} + +Write-Output "No issues were found checking the R package" +Exit 0 diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1 index 1d6624fa344c..ec292b0b79fc 100644 --- a/.ci/test_windows.ps1 +++ b/.ci/test_windows.ps1 @@ -12,6 +12,11 @@ if (Test-Path env:APPVEYOR) { $env:BUILD_SOURCESDIRECTORY = $env:APPVEYOR_BUILD_FOLDER } +if ($env:TASK -eq "r-package") { + & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $? + Exit 0 +} + # setup for Python conda init powershell conda activate diff --git a/.travis.yml b/.travis.yml index bbd72bb9b207..dde786e9dfbb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,11 +45,11 @@ before_install: - if [[ $TRAVIS_OS_NAME == "osx" ]]; then export OS_NAME="macos"; export COMPILER="gcc"; - export R_MAC_VERSION=3.6.1; + export R_MAC_VERSION=3.6.3; else export OS_NAME="linux"; export COMPILER="clang"; - export R_TRAVIS_LINUX_VERSION=3.6.1-3bionic; + export R_TRAVIS_LINUX_VERSION=3.6.3-1bionic; fi - export CONDA="$HOME/miniconda" - export PATH="$CONDA/bin:$PATH" diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 74d8d0ee4f82..81fbc1c38366 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -24,6 +24,8 @@ jobs: strategy: maxParallel: 6 matrix: + r_package: + TASK: r-package regular: TASK: regular sdist: @@ -42,8 +44,6 @@ jobs: TASK: gpu METHOD: source PYTHON_VERSION: 3.6 - r_package: - TASK: r-package steps: - script: | echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" @@ -78,6 +78,8 @@ jobs: strategy: maxParallel: 3 matrix: + r_package: + TASK: r-package regular: TASK: regular PYTHON_VERSION: 3.6 @@ -86,8 +88,6 @@ jobs: PYTHON_VERSION: 3.5 bdist: TASK: bdist - r_package: - TASK: r-package steps: - script: | echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" @@ -99,7 +99,7 @@ jobs: echo "##vso[task.setvariable variable=CONDA]$CONDA" echo "##vso[task.prependpath]$CONDA/bin" echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_8_X64" - echo "##vso[task.setvariable variable=R_MAC_VERSION]3.6.1" + echo "##vso[task.setvariable variable=R_MAC_VERSION]3.6.3" displayName: 'Set variables' - bash: $(Build.SourcesDirectory)/.ci/setup.sh displayName: Setup @@ -119,6 +119,9 @@ jobs: strategy: maxParallel: 3 matrix: + rpkg: + TASK: r-package + R_WINDOWS_VERSION: 3.6.3 regular: TASK: regular PYTHON_VERSION: 3.7 diff --git a/CMakeLists.txt b/CMakeLists.txt index e0734b0534ab..2ecd401831fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -316,7 +316,16 @@ if(WIN32 AND (MINGW OR CYGWIN)) endif() if(BUILD_FOR_R) + if(MSVC) + # https://docs.microsoft.com/en-us/cpp/build/reference/link-input-files?redirectedfrom=MSDN&view=vs-2019 + set_property( + TARGET _lightgbm + PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" + ) + TARGET_LINK_LIBRARIES(_lightgbm ${CMAKE_CURRENT_BINARY_DIR}/R.lib) + else() TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_CORE_LIBRARY}) + endif() endif(BUILD_FOR_R) install(TARGETS lightgbm _lightgbm diff --git a/R-package/src/cmake/modules/FindLibR.cmake b/R-package/src/cmake/modules/FindLibR.cmake index 20c0a974e1ff..7d3e9cf499ab 100644 --- a/R-package/src/cmake/modules/FindLibR.cmake +++ b/R-package/src/cmake/modules/FindLibR.cmake @@ -10,7 +10,6 @@ # LIBR_HOME # LIBR_EXECUTABLE # LIBR_INCLUDE_DIRS -# LIBR_LIB_DIR # LIBR_CORE_LIBRARY # and a CMake function to create R.lib for MSVC @@ -36,8 +35,8 @@ function(create_rlib_for_msvc) message(FATAL_ERROR "create_rlib_for_msvc() can only be used with MSVC") endif() - if(NOT EXISTS "${LIBR_LIB_DIR}") - message(FATAL_ERROR "LIBR_LIB_DIR, '${LIBR_LIB_DIR}', not found") + if(NOT EXISTS "${LIBR_CORE_LIBRARY}") + message(FATAL_ERROR "LIBR_CORE_LIBRARY, '${LIBR_CORE_LIBRARY}', not found") endif() find_program(GENDEF_EXE gendef) @@ -50,7 +49,7 @@ function(create_rlib_for_msvc) # extract symbols from R.dll into R.def and R.lib import library execute_process(COMMAND ${GENDEF_EXE} - "-" "${LIBR_LIB_DIR}/R.dll" + "-" "${LIBR_CORE_LIBRARY}" OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/R.def" ) execute_process(COMMAND ${DLLTOOL_EXE} @@ -168,23 +167,21 @@ execute_process( OUTPUT_VARIABLE LIBR_INCLUDE_DIRS ) -# ask R for the lib dir -execute_process( - COMMAND ${LIBR_EXECUTABLE} "--slave" "--vanilla" "-e" "cat(normalizePath(R.home('lib'), winslash='/'))" - OUTPUT_VARIABLE LIBR_LIB_DIR -) - -# look for the core R library -find_library( - LIBR_CORE_LIBRARY - NAMES R - HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_LIB_DIR}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}" -) - set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory") set(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH "R executable") set(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH "R include directory") -set(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH "R shared libraries directory") + +# look for the core R library +if(WIN32) + set(LIBR_CORE_LIBRARY ${LIBR_HOME}/bin/${R_ARCH}/R.dll) +else() + find_library( + LIBR_CORE_LIBRARY + NAMES R + HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_HOME}/lib" "${LIBR_HOME}/bin/${R_ARCH}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}" + ) +endif() + set(LIBR_CORE_LIBRARY ${LIBR_CORE_LIBRARY} CACHE PATH "R core shared library") if(WIN32 AND MSVC) @@ -203,6 +200,5 @@ find_package_handle_standard_args(LibR DEFAULT_MSG LIBR_HOME LIBR_EXECUTABLE LIBR_INCLUDE_DIRS - LIBR_LIB_DIR LIBR_CORE_LIBRARY ) diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R index a79c93d846b7..339e8417ee70 100644 --- a/R-package/src/install.libs.R +++ b/R-package/src/install.libs.R @@ -67,6 +67,7 @@ if (!use_precompile) { # Check if Windows installation (for gcc vs Visual Studio) if (WINDOWS) { if (use_mingw) { + print("Trying to build with MinGW") cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") build_cmd <- "mingw32-make.exe _lightgbm" system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h index 55568e41f544..047215231fc6 100644 --- a/include/LightGBM/tree.h +++ b/include/LightGBM/tree.h @@ -257,13 +257,11 @@ class Tree { inline int NumericalDecision(double fval, int node) const { uint8_t missing_type = GetMissingType(decision_type_[node]); - if (std::isnan(fval)) { - if (missing_type != 2) { - fval = 0.0f; - } + if (std::isnan(fval) && missing_type != MissingType::NaN) { + fval = 0.0f; } - if ((missing_type == 1 && IsZero(fval)) - || (missing_type == 2 && std::isnan(fval))) { + if ((missing_type == MissingType::Zero && IsZero(fval)) + || (missing_type == MissingType::NaN && std::isnan(fval))) { if (GetDecisionType(decision_type_[node], kDefaultLeftMask)) { return left_child_[node]; } else { @@ -279,8 +277,8 @@ class Tree { inline int NumericalDecisionInner(uint32_t fval, int node, uint32_t default_bin, uint32_t max_bin) const { uint8_t missing_type = GetMissingType(decision_type_[node]); - if ((missing_type == 1 && fval == default_bin) - || (missing_type == 2 && fval == max_bin)) { + if ((missing_type == MissingType::Zero && fval == default_bin) + || (missing_type == MissingType::NaN && fval == max_bin)) { if (GetDecisionType(decision_type_[node], kDefaultLeftMask)) { return left_child_[node]; } else { @@ -301,7 +299,7 @@ class Tree { return right_child_[node];; } else if (std::isnan(fval)) { // NaN is always in the right - if (missing_type == 2) { + if (missing_type == MissingType::NaN) { return right_child_[node]; } int_fval = 0; diff --git a/src/io/json11.cpp b/src/io/json11.cpp index 9e1bb78c13c8..0be9bbdf3a6d 100644 --- a/src/io/json11.cpp +++ b/src/io/json11.cpp @@ -20,13 +20,12 @@ */ #include -#include -#ifndef LGB_R_BUILD - #include -#endif +#include + #include #include #include +#include namespace json11 { @@ -39,6 +38,8 @@ using std::make_shared; using std::initializer_list; using std::move; +using LightGBM::Log; + /* Helper for representing null - just a do-nothing struct, plus comparison * operators so the helpers in JsonValue work. We can't use nullptr_t because * it may not be orderable. @@ -626,9 +627,7 @@ struct JsonParser final { * the input and return res. If not, flag an error. */ Json expect(const string &expected, Json res) { - #ifndef LGB_R_BUILD - assert(i != 0); - #endif + CHECK_NE(i, 0) i--; if (str.compare(i, expected.length(), expected) == 0) { i += expected.length(); diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 5b5e24a2321c..4c8fb4eb0e20 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -57,13 +57,7 @@ int Tree::Split(int leaf, int feature, int real_feature, uint32_t threshold_bin, decision_type_[new_node_idx] = 0; SetDecisionType(&decision_type_[new_node_idx], false, kCategoricalMask); SetDecisionType(&decision_type_[new_node_idx], default_left, kDefaultLeftMask); - if (missing_type == MissingType::None) { - SetMissingType(&decision_type_[new_node_idx], 0); - } else if (missing_type == MissingType::Zero) { - SetMissingType(&decision_type_[new_node_idx], 1); - } else if (missing_type == MissingType::NaN) { - SetMissingType(&decision_type_[new_node_idx], 2); - } + SetMissingType(&decision_type_[new_node_idx], missing_type); threshold_in_bin_[new_node_idx] = threshold_bin; threshold_[new_node_idx] = threshold_double; ++num_leaves_; @@ -77,13 +71,7 @@ int Tree::SplitCategorical(int leaf, int feature, int real_feature, const uint32 int new_node_idx = num_leaves_ - 1; decision_type_[new_node_idx] = 0; SetDecisionType(&decision_type_[new_node_idx], true, kCategoricalMask); - if (missing_type == MissingType::None) { - SetMissingType(&decision_type_[new_node_idx], 0); - } else if (missing_type == MissingType::Zero) { - SetMissingType(&decision_type_[new_node_idx], 1); - } else if (missing_type == MissingType::NaN) { - SetMissingType(&decision_type_[new_node_idx], 2); - } + SetMissingType(&decision_type_[new_node_idx], missing_type); threshold_in_bin_[new_node_idx] = num_cat_; threshold_[new_node_idx] = num_cat_; ++num_cat_; @@ -316,9 +304,9 @@ std::string Tree::NodeToJSON(int index) const { str_buf << "\"default_left\":false," << '\n'; } uint8_t missing_type = GetMissingType(decision_type_[index]); - if (missing_type == 0) { + if (missing_type == MissingType::None) { str_buf << "\"missing_type\":\"None\"," << '\n'; - } else if (missing_type == 1) { + } else if (missing_type == MissingType::Zero) { str_buf << "\"missing_type\":\"Zero\"," << '\n'; } else { str_buf << "\"missing_type\":\"NaN\"," << '\n'; @@ -347,9 +335,10 @@ std::string Tree::NumericalDecisionIfElse(int node) const { std::stringstream str_buf; uint8_t missing_type = GetMissingType(decision_type_[node]); bool default_left = GetDecisionType(decision_type_[node], kDefaultLeftMask); - if (missing_type == 0 || (missing_type == 1 && default_left && kZeroThreshold < threshold_[node])) { + if (missing_type == MissingType::None + || (missing_type == MissingType::Zero && default_left && kZeroThreshold < threshold_[node])) { str_buf << "if (fval <= " << threshold_[node] << ") {"; - } else if (missing_type == 1) { + } else if (missing_type == MissingType::Zero) { if (default_left) { str_buf << "if (fval <= " << threshold_[node] << " || Tree::IsZero(fval)" << " || std::isnan(fval)) {"; } else { @@ -368,7 +357,7 @@ std::string Tree::NumericalDecisionIfElse(int node) const { std::string Tree::CategoricalDecisionIfElse(int node) const { uint8_t missing_type = GetMissingType(decision_type_[node]); std::stringstream str_buf; - if (missing_type == 2) { + if (missing_type == MissingType::NaN) { str_buf << "if (std::isnan(fval)) { int_fval = -1; } else { int_fval = static_cast(fval); }"; } else { str_buf << "if (std::isnan(fval)) { int_fval = 0; } else { int_fval = static_cast(fval); }";