Skip to content

Commit

Permalink
merged changes from microsoft#2936
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Apr 26, 2020
2 parents 40fa72d + 2c18a0f commit 25cb78a
Show file tree
Hide file tree
Showing 85 changed files with 838 additions and 211 deletions.
2 changes: 2 additions & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ configuration: # a trick to construct a build matrix with multiple Python versi

environment:
matrix:
- COMPILER: MINGW
TASK: r-package
- COMPILER: MSVC
TASK: python
- COMPILER: MINGW
Expand Down
4 changes: 2 additions & 2 deletions .ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if [[ $OS_NAME == "macos" ]]; then
if [[ $AZURE == "true" ]] && [[ $TASK == "sdist" ]]; then
brew install https://raw.githubusercontent.com/Homebrew/homebrew-core/f3544543a3115023fc7ca962c21d14b443f419d0/Formula/swig.rb # swig 3.0.12
fi
wget -q -O conda.sh https://repo.continuum.io/miniconda/Miniconda${PYTHON_VERSION:0:1}-latest-MacOSX-x86_64.sh
wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
else # Linux
if [[ $TASK == "mpi" ]]; then
sudo apt-get update
Expand All @@ -37,7 +37,7 @@ else # Linux
echo libamdocl64.so > $OPENCL_VENDOR_PATH/amdocl64.icd
fi
if [[ $TRAVIS == "true" ]]; then
wget -q -O conda.sh https://repo.continuum.io/miniconda/Miniconda${PYTHON_VERSION:0:1}-latest-Linux-x86_64.sh
wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
fi
fi

Expand Down
2 changes: 1 addition & 1 deletion .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then
echo "Linting R code"
Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit -1
echo "Linting C++ code"
cpplint --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length --recursive ./src ./include || exit 0
cpplint --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length --recursive ./src ./include || exit -1
exit 0
fi

Expand Down
17 changes: 9 additions & 8 deletions .ci/test_r_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ fi

# Installing R precompiled for Mac OS 10.11 or higher
if [[ $OS_NAME == "macos" ]]; then

# temp fix for basictex
if [[ $AZURE == "true" ]]; then
brew update
fi
brew install qpdf
brew cask install basictex
export PATH="/Library/TeX/texbin:$PATH"
Expand Down Expand Up @@ -86,9 +81,17 @@ export _R_CHECK_FORCE_SUGGESTS_=0

# fails tests if either ERRORs or WARNINGs are thrown by
# R CMD CHECK
check_succeeded="yes"
R CMD check ${PKG_TARBALL} \
--as-cran \
|| exit -1
|| check_succeeded="no"

echo "R CMD check build logs:"
cat ${BUILD_DIRECTORY}/lightgbm.Rcheck/00install.out

if [[ $check_succeeded == "no" ]]; then
exit -1
fi

if grep -q -R "WARNING" "$LOG_FILE_NAME"; then
echo "WARNINGS have been found by R CMD check!"
Expand All @@ -105,5 +108,3 @@ if [[ ${NUM_CHECK_NOTES} -gt ${ALLOWED_CHECK_NOTES} ]]; then
echo "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
exit -1
fi

exit 0
108 changes: 108 additions & 0 deletions .ci/test_r_package_windows.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Download a file and retry upon failure. This looks like
# an infinite loop but CI-level timeouts will kill it
function Download-File-With-Retries {
param(
[string]$url,
[string]$destfile
)
do {
Write-Output "Downloading ${url}"
sleep 5;
(New-Object System.Net.WebClient).DownloadFile($url, $destfile)
} while(!$?);
}

$env:R_WINDOWS_VERSION = "3.6.3"
$env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/'
$env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH
$env:CRAN_MIRROR = "https://cloud.r-project.org/"
$env:CTAN_MIRROR = "https://ctan.math.illinois.edu/systems/win32/miktex/tm/packages/"

if ($env:COMPILER -eq "MINGW") {
$env:CXX = "$env:R_LIB_PATH/Rtools/mingw_64/bin/g++.exe"
$env:CC = "$env:R_LIB_PATH/Rtools/mingw_64/bin/gcc.exe"
}

cd $env:BUILD_SOURCESDIRECTORY
tzutil /s "GMT Standard Time"
[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH)

if ($env:COMPILER -eq "MINGW") {
Write-Output "Telling R to use MinGW"
$install_libs = "$env:BUILD_SOURCESDIRECTORY/R-package/src/install.libs.R"
((Get-Content -path $install_libs -Raw) -replace 'use_mingw <- FALSE','use_mingw <- TRUE') | Set-Content -Path $install_libs
}

# download R and RTools
Write-Output "Downloading R and Rtools"
Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/Rtools/Rtools35.exe" -destfile "Rtools.exe"

# Install R
Write-Output "Installing R"
Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64" ; Check-Output $?
Write-Output "Done installing R"

Write-Output "Installing Rtools"
Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/Rtools" ; Check-Output $?
Write-Output "Done installing Rtools"

# MiKTeX and pandoc can be skipped on non-MINGW builds, since we don't
# build the package documentation for those
if ($env:COMPILER -eq "MINGW") {
Write-Output "Downloading MiKTeX"
Download-File-With-Retries -url "https://miktex.org/download/win/miktexsetup-x64.zip" -destfile "miktexsetup-x64.zip"
Add-Type -AssemblyName System.IO.Compression.FileSystem
[System.IO.Compression.ZipFile]::ExtractToDirectory("miktexsetup-x64.zip", "miktex")
Write-Output "Setting up MiKTeX"
.\miktex\miktexsetup.exe --remote-package-repository="$env:CTAN_MIRROR" --local-package-repository=./miktex/download --package-set=essential --quiet download ; Check-Output $?
Write-Output "Installing MiKTeX"
.\miktex\download\miktexsetup.exe --remote-package-repository="$env:CTAN_MIRROR" --portable="$env:R_LIB_PATH/miktex" --quiet install ; Check-Output $?
Write-Output "Done installing MiKTeX"

initexmf --set-config-value [MPM]AutoInstall=1
conda install -q -y --no-deps pandoc
}

Add-Content .Renviron "R_LIBS=$env:R_LIB_PATH"

Write-Output "Installing dependencies"
$packages = "c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
Rscript --vanilla -e "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH')" ; Check-Output $?

Write-Output "Building R package"
Rscript build_r.R --skip-install ; Check-Output $?

$PKG_FILE_NAME = Get-Item *.tar.gz
$LOG_FILE_NAME = "lightgbm.Rcheck/00check.log"

$env:_R_CHECK_FORCE_SUGGESTS_ = 0
if ($env:COMPILER -ne "MINGW") {
Write-Output "Running R CMD check without checking documentation"
R.exe CMD check --no-multiarch --no-examples --no-manual --ignore-vignettes ${PKG_FILE_NAME} ; $check_succeeded = $?
} else {
Write-Output "Running R CMD check as CRAN"
R.exe CMD check --no-multiarch --as-cran ${PKG_FILE_NAME} ; $check_succeeded = $?
}

Write-Output "R CMD check build logs:"
Get-Content -Path $env:BUILD_SOURCESDIRECTORY\lightgbm.Rcheck\00install.out

Check-Output $check_succeeded

Write-Output "Looking for issues with R CMD check results"
if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "WARNING" -Quiet) {
echo "WARNINGS have been found by R CMD check!"
Check-Output $False
}

$note_str = Get-Content "${LOG_FILE_NAME}" | Select-String -Pattern ' NOTE' | Out-String ; Check-Output $?
$relevant_line = $note_str -match '.*Status: (\d+) NOTE.*'
$NUM_CHECK_NOTES = $matches[1]
$ALLOWED_CHECK_NOTES = 3
if ([int]$NUM_CHECK_NOTES -gt $ALLOWED_CHECK_NOTES) {
Write-Output "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
Check-Output $False
}

Write-Output "No issues were found checking the R package"
5 changes: 5 additions & 0 deletions .ci/test_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ if (Test-Path env:APPVEYOR) {
$env:BUILD_SOURCESDIRECTORY = $env:APPVEYOR_BUILD_FOLDER
}

if ($env:TASK -eq "r-package") {
& $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
Exit 0
}

# setup for Python
conda init powershell
conda activate
Expand Down
6 changes: 3 additions & 3 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
* @guolinke @StrikerRUS @jameslamb @Laurae2

# main C++ code
include/ @guolinke @chivee
src/ @guolinke @chivee
CmakeLists.txt @guolinke @chivee @Laurae2 @jameslamb @wxchan @henry0312 @StrikerRUS @huanzhang12
include/ @guolinke @chivee @btrotta
src/ @guolinke @chivee @btrotta
CmakeLists.txt @guolinke @chivee @Laurae2 @jameslamb @wxchan @henry0312 @StrikerRUS @huanzhang12 @btrotta

# R code
include/LightGBM/lightgbm_R.h @Laurae2 @jameslamb
Expand Down
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,14 @@ lightgbm_r/*
lightgbm*.tar.gz
lightgbm.Rcheck/

# Files created by R examples and tests
**/lgb-Dataset.data
**/lgb-model.rds
**/lgb.Dataset.data
**/model.rds
**/model.txt
**/lgb-model.txt

# Files from interactive R sessions
.Rproj.user
**/.Rhistory
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ formats:
python:
version: 3
install:
- requirements: docs/requirements.txt
- requirements: docs/requirements_rtd.txt
sphinx:
builder: html
configuration: docs/conf.py
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ os:
- linux
- osx
dist: bionic
osx_image: xcode11.3
osx_image: xcode11.4

env:
global: # default values
Expand Down
9 changes: 6 additions & 3 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
vmImage: 'ubuntu-latest'
container: ubuntu1404
strategy:
maxParallel: 6
maxParallel: 7
matrix:
regular:
TASK: regular
Expand Down Expand Up @@ -76,7 +76,7 @@ jobs:
pool:
vmImage: 'macOS-10.14'
strategy:
maxParallel: 3
maxParallel: 4
matrix:
regular:
TASK: regular
Expand Down Expand Up @@ -117,8 +117,11 @@ jobs:
pool:
vmImage: 'vs2017-win2016'
strategy:
maxParallel: 3
maxParallel: 4
matrix:
r_package:
TASK: r-package
COMPILER: MINGW
regular:
TASK: regular
PYTHON_VERSION: 3.6
Expand Down
18 changes: 11 additions & 7 deletions R-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,26 @@ Title: Light Gradient Boosting Machine
Version: 2.3.2
Date: 2019-11-26
Authors@R: c(
person("Guolin", "Ke", email = "[email protected]", role = c("aut", "cre")),
person("Damien", "Soukhavong", email = "[email protected]", role = c("ctb")),
person("Yachen", "Yan", role = c("ctb")),
person("James", "Lamb", email="[email protected]", role = c("ctb"))
)
person("Guolin", "Ke", email = "[email protected]", role = c("aut", "cre")),
person("Damien", "Soukhavong", email = "[email protected]", role = c("ctb")),
person("Yachen", "Yan", role = c("ctb")),
person("James", "Lamb", email="[email protected]", role = c("ctb"))
)
Description: Tree based algorithms can be improved by introducing boosting frameworks. LightGBM is one such framework, and this package offers an R interface to work with it.
It is designed to be distributed and efficient with the following advantages:
1. Faster training speed and higher efficiency.
2. Lower memory usage.
3. Better accuracy.
4. Parallel learning supported.
5. Capable of handling large-scale data.
In recognition of these advantages, LightGBM has being widely-used in many winning solutions of machine learning competitions.
In recognition of these advantages, LightGBM has been widely-used in many winning solutions of machine learning competitions.
Comparison experiments on public datasets suggest that LightGBM can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. In addition, parallel experiments suggest that in certain circumstances, LightGBM can achieve a linear speed-up in training time by using multiple machines.
Encoding: UTF-8
License: MIT + file LICENSE
URL: https://github.com/Microsoft/LightGBM
BugReports: https://github.com/Microsoft/LightGBM/issues
NeedsCompilation: yes
Biarch: false
Suggests:
ggplot2 (>= 1.0.1),
knitr,
Expand All @@ -37,4 +39,6 @@ Imports:
Matrix (>= 1.1-0),
methods,
utils
RoxygenNote: 7.0.2
SystemRequirements:
C++11
RoxygenNote: 7.1.0
1 change: 1 addition & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export(saveRDS.lgb.Booster)
export(setinfo)
export(slice)
import(methods)
importFrom(Matrix,Matrix)
importFrom(R6,R6Class)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
Expand Down
20 changes: 14 additions & 6 deletions R-package/R/callback.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# constants that control naming in lists
.EVAL_KEY <- function() {
return("eval")
}
.EVAL_ERR_KEY <- function() {
return("eval_err")
}

#' @importFrom R6 R6Class
CB_ENV <- R6::R6Class(
"lgb.cb_env",
Expand Down Expand Up @@ -216,8 +224,8 @@ cb.record.evaluation <- function() {

# Create dummy lists
env$model$record_evals[[data_name]][[name]] <- list()
env$model$record_evals[[data_name]][[name]]$eval <- list()
env$model$record_evals[[data_name]][[name]]$eval_err <- list()
env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- list()
env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- list()

}

Expand All @@ -238,12 +246,12 @@ cb.record.evaluation <- function() {
name <- eval_res$name

# Store evaluation data
env$model$record_evals[[data_name]][[name]]$eval <- c(
env$model$record_evals[[data_name]][[name]]$eval
env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- c(
env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]]
, eval_res$value
)
env$model$record_evals[[data_name]][[name]]$eval_err <- c(
env$model$record_evals[[data_name]][[name]]$eval_err
env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- c(
env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]]
, eval_err
)

Expand Down
Loading

0 comments on commit 25cb78a

Please sign in to comment.