From 899d074e15a01ccfb8871e38d8809de393f1cc84 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 14 May 2020 17:31:13 +0100 Subject: [PATCH] [R-package] speed up installation with Visual Studio (#2965) * fix conflicts * Update R-package/src/install.libs.R * empty commit --- .appveyor.yml | 3 + .ci/test_r_package_windows.ps1 | 75 ++++++++++------ .vsts-ci.yml | 2 +- R-package/DESCRIPTION | 1 + R-package/src/install.libs.R | 156 +++++++++++++++++++++++---------- build_r.R | 47 ++++++++-- 6 files changed, 203 insertions(+), 81 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 20807e36e6f5..2ad8f641e426 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -9,6 +9,9 @@ environment: matrix: - COMPILER: MINGW TASK: r-package + - COMPILER: MSVC + TASK: r-package + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 - COMPILER: MSVC TASK: python - COMPILER: MINGW diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 index 5bd65d28408b..966cc3dc0e91 100644 --- a/.ci/test_r_package_windows.ps1 +++ b/.ci/test_r_package_windows.ps1 @@ -14,6 +14,7 @@ function Download-File-With-Retries { $env:R_WINDOWS_VERSION = "3.6.3" $env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/' +$env:R_LIBS = "$env:R_LIB_PATH" $env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH $env:CRAN_MIRROR = "https://cloud.r-project.org/" $env:CTAN_MIRROR = "https://ctan.math.illinois.edu/systems/win32/miktex/tm/packages/" @@ -64,45 +65,65 @@ if ($env:COMPILER -eq "MINGW") { conda install -q -y --no-deps pandoc } -Add-Content .Renviron "R_LIBS=$env:R_LIB_PATH" - Write-Output "Installing dependencies" -$packages = "c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" +$packages = "c('data.table', 'jsonlite', 'Matrix', 'processx', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" Rscript --vanilla -e "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH')" ; Check-Output $? Write-Output "Building R package" -Rscript build_r.R --skip-install ; Check-Output $? -$PKG_FILE_NAME = Get-Item *.tar.gz -$LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" +# R CMD check is not used for MSVC builds +if ($env:COMPILER -ne "MSVC") { + Rscript build_r.R --skip-install ; Check-Output $? -$env:_R_CHECK_FORCE_SUGGESTS_ = 0 -if ($env:COMPILER -ne "MINGW") { - Write-Output "Running R CMD check without checking documentation" - R.exe CMD check --no-multiarch --no-examples --no-manual --ignore-vignettes ${PKG_FILE_NAME} ; $check_succeeded = $? -} else { + $PKG_FILE_NAME = Get-Item *.tar.gz + $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" + + $env:_R_CHECK_FORCE_SUGGESTS_ = 0 Write-Output "Running R CMD check as CRAN" R.exe CMD check --no-multiarch --as-cran ${PKG_FILE_NAME} ; $check_succeeded = $? -} -Write-Output "R CMD check build logs:" -Get-Content -Path $env:BUILD_SOURCESDIRECTORY\lightgbm.Rcheck\00install.out - -Check-Output $check_succeeded + Write-Output "R CMD check build logs:" + $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\lightgbm.Rcheck\00install.out" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" + + Check-Output $check_succeeded + + Write-Output "Looking for issues with R CMD check results" + if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "WARNING" -Quiet) { + echo "WARNINGS have been found by R CMD check!" + Check-Output $False + } + + $note_str = Get-Content "${LOG_FILE_NAME}" | Select-String -Pattern ' NOTE' | Out-String ; Check-Output $? + $relevant_line = $note_str -match '.*Status: (\d+) NOTE.*' + $NUM_CHECK_NOTES = $matches[1] + $ALLOWED_CHECK_NOTES = 3 + if ([int]$NUM_CHECK_NOTES -gt $ALLOWED_CHECK_NOTES) { + Write-Output "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed" + Check-Output $False + } +} else { + $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" + Rscript build_r.R *> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? + Write-Output "----- build and install logs -----" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" + Write-Output "----- end of build and install logs -----" + Check-Output $install_succeeded +} -Write-Output "Looking for issues with R CMD check results" -if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "WARNING" -Quiet) { - echo "WARNINGS have been found by R CMD check!" - Check-Output $False +# Checking that we actually got the expected compiler. The R package has some logic +# to fail back to MinGW if MSVC fails, but for CI builds we need to check that the correct +# compiler was used. +$checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" +if ($checks.Matches.length -eq 0) { + Write-Output "The wrong compiler was used. Check the build logs." + Check-Output $False } -$note_str = Get-Content "${LOG_FILE_NAME}" | Select-String -Pattern ' NOTE' | Out-String ; Check-Output $? -$relevant_line = $note_str -match '.*Status: (\d+) NOTE.*' -$NUM_CHECK_NOTES = $matches[1] -$ALLOWED_CHECK_NOTES = 3 -if ([int]$NUM_CHECK_NOTES -gt $ALLOWED_CHECK_NOTES) { - Write-Output "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed" - Check-Output $False +if ($env:COMPILER -eq "MSVC") { + Write-Output "Running tests with testthat.R" + cd R-package/tests + Rscript testthat.R ; Check-Output $? } Write-Output "No issues were found checking the R package" diff --git a/.vsts-ci.yml b/.vsts-ci.yml index c72d3a2e2291..359ecc4389b9 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -120,7 +120,7 @@ jobs: matrix: r_package: TASK: r-package - COMPILER: MINGW + COMPILER: MSVC regular: TASK: regular PYTHON_VERSION: 3.6 diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 1924b6a1d666..9e2cf6b1e513 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -27,6 +27,7 @@ Biarch: false Suggests: ggplot2 (>= 1.0.1), knitr, + processx, rmarkdown, testthat Depends: diff --git a/R-package/src/install.libs.R b/R-package/src/install.libs.R index 339e8417ee70..b04eecc3317f 100644 --- a/R-package/src/install.libs.R +++ b/R-package/src/install.libs.R @@ -12,7 +12,77 @@ R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10.0 if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262" || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")) { - print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.") + warning("Warning: unmatched R_INTERNALS_UUID, may not run normally.") +} + +# system() will not raise an R exception if the process called +# fails. Wrapping it here to get that behavior. +# +# system() introduces a lot of overhead, at least on Windows, +# so trying processx if it is available +.run_shell_command <- function(cmd, args, strict = TRUE) { + on_windows <- .Platform$OS.type == "windows" + has_processx <- suppressMessages({ + suppressWarnings({ + require("processx") # nolint + }) + }) + if (has_processx && on_windows) { + result <- processx::run( + command = cmd + , args = args + , windows_verbatim_args = TRUE + , error_on_status = FALSE + , echo = TRUE + ) + exit_code <- result$status + } else { + if (on_windows) { + message(paste0( + "Using system() to run shell commands. Installing " + , "'processx' with install.packages('processx') might " + , "make this faster." + )) + } + cmd <- paste0(cmd, " ", paste0(args, collapse = " ")) + exit_code <- system(cmd) + } + + if (exit_code != 0L && isTRUE(strict)) { + stop(paste0("Command failed with exit code: ", exit_code)) + } + return(invisible(exit_code)) +} + +# try to generate Visual Studio build files +.generate_vs_makefiles <- function(cmake_args) { + vs_versions <- c( + "Visual Studio 16 2019" + , "Visual Studio 15 2017" + , "Visual Studio 14 2015" + ) + working_vs_version <- NULL + for (vs_version in vs_versions) { + message(sprintf("Trying '%s'", vs_version)) + # if the build directory is not empty, clean it + if (file.exists("CMakeCache.txt")) { + file.remove("CMakeCache.txt") + } + vs_cmake_args <- c( + cmake_args + , "-G" + , shQuote(vs_version) + , "-A" + , "x64" + ) + exit_code <- .run_shell_command("cmake", c(vs_cmake_args, ".."), strict = FALSE) + if (exit_code == 0L) { + message(sprintf("Successfully created build files for '%s'", vs_version)) + return(invisible(TRUE)) + } + + } + return(invisible(FALSE)) } # Move in CMakeLists.txt @@ -41,17 +111,18 @@ if (!use_precompile) { setwd(build_dir) # Prepare installation steps - cmake_cmd <- "cmake " - build_cmd <- "make _lightgbm" + cmake_args <- NULL + build_cmd <- "make" + build_args <- "_lightgbm" lib_folder <- file.path(source_dir, fsep = "/") if (use_gpu) { - cmake_cmd <- paste0(cmake_cmd, " -DUSE_GPU=ON ") + cmake_args <- c(cmake_args, "-DUSE_GPU=ON") } if (R_ver >= 3.5) { - cmake_cmd <- paste0(cmake_cmd, " -DUSE_R35=ON ") + cmake_args <- c(cmake_args, "-DUSE_R35=ON") } - cmake_cmd <- paste0(cmake_cmd, " -DBUILD_FOR_R=ON ") + cmake_args <- c(cmake_args, "-DBUILD_FOR_R=ON") # Pass in R version, used to help find R executable for linking R_version_string <- paste( @@ -59,53 +130,47 @@ if (!use_precompile) { , R.Version()[["minor"]] , sep = "." ) - cmake_cmd <- sprintf( - paste0(cmake_cmd, " -DCMAKE_R_VERSION='%s' ") - , R_version_string - ) + r_version_arg <- sprintf("-DCMAKE_R_VERSION='%s'", R_version_string) + cmake_args <- c(cmake_args, r_version_arg) + + # the checks below might already run `cmake -G`. If they do, set this flag + # to TRUE to avoid re-running it later + makefiles_already_generated <- FALSE # Check if Windows installation (for gcc vs Visual Studio) if (WINDOWS) { if (use_mingw) { - print("Trying to build with MinGW") - cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") - build_cmd <- "mingw32-make.exe _lightgbm" - system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools + message("Trying to build with MinGW") + # Must build twice for Windows due sh.exe in Rtools + cmake_args <- c(cmake_args, "-G", shQuote("MinGW Makefiles")) + .run_shell_command("cmake", c(cmake_args, ".."), strict = FALSE) + build_cmd <- "mingw32-make.exe" + build_args <- "_lightgbm" } else { - local_vs_def <- "" - vs_versions <- c( - "Visual Studio 16 2019" - , "Visual Studio 15 2017" - , "Visual Studio 14 2015" - ) - for (vs in vs_versions) { - print(paste0("Trying to build with: '", vs, "'")) - vs_def <- paste0(" -G \"", vs, "\" -A x64") - tmp_cmake_cmd <- paste0(cmake_cmd, vs_def) - try_vs <- system(paste0(tmp_cmake_cmd, " ..")) - if (try_vs == 0L) { - local_vs_def <- vs_def - print(paste0("Building with '", vs, "' succeeded")) - break - } else { - unlink("./*", recursive = TRUE) # Clean up build directory - } - } - if (try_vs == 1L) { - print("Building with Visual Studio failed. Attempted with MinGW") - cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") - system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools - build_cmd <- "mingw32-make.exe _lightgbm" + visual_studio_succeeded <- .generate_vs_makefiles(cmake_args) + if (!isTRUE(visual_studio_succeeded)) { + warning("Building with Visual Studio failed. Attempting with MinGW") + # Must build twice for Windows due sh.exe in Rtools + cmake_args <- c(cmake_args, "-G", shQuote("MinGW Makefiles")) + .run_shell_command("cmake", c(cmake_args, ".."), strict = FALSE) + build_cmd <- "mingw32-make.exe" + build_args <- "_lightgbm" } else { - cmake_cmd <- paste0(cmake_cmd, local_vs_def) - build_cmd <- "cmake --build . --target _lightgbm --config Release" + build_cmd <- "cmake" + build_args <- c("--build", ".", "--target", "_lightgbm", "--config", "Release") lib_folder <- file.path(source_dir, "Release", fsep = "/") + makefiles_already_generated <- TRUE } } + } else { + .run_shell_command("cmake", c(cmake_args, "..")) + makefiles_already_generated <- TRUE } - # Install - system(paste0(cmake_cmd, " ..")) + # generate build files + if (!makefiles_already_generated) { + .run_shell_command("cmake", c(cmake_args, "..")) + } # R CMD check complains about the .NOTPARALLEL directive created in the cmake # Makefile. We don't need it here anyway since targets are built serially, so trying @@ -130,7 +195,8 @@ if (!use_precompile) { ) } - system(build_cmd) + # build the library + .run_shell_command(build_cmd, build_args) src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/") } else { @@ -169,7 +235,7 @@ if (!use_precompile) { dest <- file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep = "/") dir.create(dest, recursive = TRUE, showWarnings = FALSE) if (file.exists(src)) { - print(paste0("Found library file: ", src, " to move to ", dest)) + message(paste0("Found library file: ", src, " to move to ", dest)) file.copy(src, dest, overwrite = TRUE) symbols_file <- file.path(source_dir, "symbols.rds") @@ -183,7 +249,7 @@ if (file.exists(src)) { # clean up the "build" directory if (dir.exists(build_dir)) { - print("Removing 'build/' directory") + message("Removing 'build/' directory") unlink( x = build_dir , recursive = TRUE diff --git a/build_r.R b/build_r.R index 29a4d3cc382d..886ad1ae90b4 100644 --- a/build_r.R +++ b/build_r.R @@ -17,12 +17,42 @@ INSTALL_AFTER_BUILD <- !("--skip-install" %in% args) } # system() will not raise an R exception if the process called -# fails. Wrapping it here to get that behavior -.run_shell_command <- function(cmd, ...) { - exit_code <- system(cmd, ...) - if (exit_code != 0L) { +# fails. Wrapping it here to get that behavior. +# +# system() introduces a lot of overhead, at least on Windows, +# so trying processx if it is available +.run_shell_command <- function(cmd, args, strict = TRUE) { + on_windows <- .Platform$OS.type == "windows" + has_processx <- suppressMessages({ + suppressWarnings({ + require("processx") # nolint + }) + }) + if (has_processx && on_windows) { + result <- processx::run( + command = cmd + , args = args + , windows_verbatim_args = TRUE + , error_on_status = FALSE + , echo = TRUE + ) + exit_code <- result$status + } else { + if (on_windows) { + message(paste0( + "Using system() to run shell commands. Installing " + , "'processx' with install.packages('processx') might " + , "make this faster." + )) + } + cmd <- paste0(cmd, " ", paste0(args, collapse = " ")) + exit_code <- system(cmd) + } + + if (exit_code != 0L && isTRUE(strict)) { stop(paste0("Command failed with exit code: ", exit_code)) } + return(invisible(exit_code)) } # Make a new temporary folder to work in @@ -73,8 +103,7 @@ result <- file.copy( # NOTE: --keep-empty-dirs is necessary to keep the deep paths expected # by CMake while also meeting the CRAN req to create object files # on demand -cmd <- "R CMD build lightgbm_r --keep-empty-dirs" -.run_shell_command(cmd) +.run_shell_command("R", c("CMD", "build", "lightgbm_r", "--keep-empty-dirs")) # Install the package version <- gsub( @@ -88,9 +117,11 @@ version <- gsub( ) tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version)) -cmd <- sprintf("R CMD INSTALL %s --no-multiarch --with-keep.source", tarball) +install_cmd <- "R" +install_args <- c("CMD", "INSTALL", "--no-multiarch", "--with-keep.source", tarball) if (INSTALL_AFTER_BUILD) { - .run_shell_command(cmd) + .run_shell_command(install_cmd, install_args) } else { + cmd <- paste0(install_cmd, " ", paste0(install_args, collapse = " ")) print(sprintf("Skipping installation. Install the package with command '%s'", cmd)) }