diff --git a/.ci/conda-envs/ci-core.txt b/.ci/conda-envs/ci-core.txt index a0763580c7f3..46d20963ed98 100644 --- a/.ci/conda-envs/ci-core.txt +++ b/.ci/conda-envs/ci-core.txt @@ -18,7 +18,7 @@ # direct imports cffi>=1.16 -dask>=2023.5.0 +dask>=2023.5.0,<2024.12 joblib>=1.3.2 matplotlib-base>=3.7.3 numpy>=1.24.4 diff --git a/.ci/install-opencl.ps1 b/.ci/install-opencl.ps1 index 7e335fe13aa4..b69ed575f0fb 100644 --- a/.ci/install-opencl.ps1 +++ b/.ci/install-opencl.ps1 @@ -4,31 +4,37 @@ $installer = "AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe" Write-Output "Downloading OpenCL platform installer" $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed -Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer" +$params = @{ + OutFile = "$installer" + Uri = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer" +} +Invoke-WebRequest @params if (Test-Path "$installer") { - Write-Output "Successfully downloaded OpenCL platform installer" + Write-Output "Successfully downloaded OpenCL platform installer" } else { - Write-Output "Unable to download OpenCL platform installer" - Write-Output "Setting EXIT" - $host.SetShouldExit(-1) - exit 1 + Write-Output "Unable to download OpenCL platform installer" + Write-Output "Setting EXIT" + $host.SetShouldExit(-1) + exit 1 } # Install OpenCL platform from installer executable Write-Output "Running OpenCL installer" -Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait } +Invoke-Command -ScriptBlock { + Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait +} $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors -if ($property -eq $null) { - Write-Output "Unable to install OpenCL CPU platform" - Write-Output "OpenCL installation log:" - Get-Content "opencl.log" - Write-Output "Setting EXIT" - $host.SetShouldExit(-1) - exit 1 +if ($null -eq $property) { + Write-Output "Unable to install OpenCL CPU platform" + Write-Output "OpenCL installation log:" + Get-Content "opencl.log" + Write-Output "Setting EXIT" + $host.SetShouldExit(-1) + exit 1 } else { - Write-Output "Successfully installed OpenCL CPU platform" - Write-Output "Current OpenCL drivers:" - Write-Output $property + Write-Output "Successfully installed OpenCL CPU platform" + Write-Output "Current OpenCL drivers:" + Write-Output $property } diff --git a/.ci/lint-js.sh b/.ci/lint-js.sh new file mode 100644 index 000000000000..534f251620e2 --- /dev/null +++ b/.ci/lint-js.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e -E -u -o pipefail + +biome ci --config-path=./biome.json --diagnostic-level=info --error-on-warnings ./ diff --git a/.ci/lint-powershell.ps1 b/.ci/lint-powershell.ps1 new file mode 100644 index 000000000000..332a6e040319 --- /dev/null +++ b/.ci/lint-powershell.ps1 @@ -0,0 +1,56 @@ +$settings = @{ + Severity = @( + 'Information', + 'Warning', + 'Error' + ) + IncludeDefaultRules = $true + # Additional rules that are disabled by default + Rules = @{ + PSAvoidExclaimOperator = @{ + Enable = $true + } + PSAvoidLongLines = @{ + Enable = $true + MaximumLineLength = 120 + } + PSAvoidSemicolonsAsLineTerminators = @{ + Enable = $true + } + PSPlaceCloseBrace = @{ + Enable = $true + NoEmptyLineBefore = $true + IgnoreOneLineBlock = $true + NewLineAfter = $false + } + PSPlaceOpenBrace = @{ + Enable = $true + OnSameLine = $true + NewLineAfter = $true + IgnoreOneLineBlock = $true + } + PSUseConsistentIndentation = @{ + Enable = $true + IndentationSize = 4 + PipelineIndentation = 'IncreaseIndentationAfterEveryPipeline' + Kind = 'space' + } + PSUseConsistentWhitespace = @{ + Enable = $true + CheckInnerBrace = $true + CheckOpenBrace = $true + CheckOpenParen = $true + CheckOperator = $true + CheckSeparator = $true + CheckPipe = $true + CheckPipeForRedundantWhitespace = $true + CheckParameter = $true + IgnoreAssignmentOperatorInsideHashTable = $false + } + PSUseCorrectCasing = @{ + Enable = $true + } + } +} + +Invoke-ScriptAnalyzer -Path ./ -Recurse -EnableExit -Settings $settings diff --git a/.ci/lint-python.sh b/.ci/lint-python-bash.sh similarity index 100% rename from .ci/lint-python.sh rename to .ci/lint-python-bash.sh diff --git a/.ci/lint-r-code.R b/.ci/lint-r-code.R index 8de09c0ff1ac..9eae00aa5d49 100755 --- a/.ci/lint-r-code.R +++ b/.ci/lint-r-code.R @@ -1,4 +1,3 @@ - loadNamespace("lintr") args <- commandArgs( diff --git a/.ci/setup.sh b/.ci/setup.sh index e551b1683aef..30d564b2d5f4 100755 --- a/.ci/setup.sh +++ b/.ci/setup.sh @@ -142,7 +142,7 @@ else # Linux fi fi -if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then +if [[ "${TASK}" != "r-package" ]]; then if [[ $SETUP_CONDA != "false" ]]; then curl \ -sL \ diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index 269695c51462..a3f524b60be7 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -1,16 +1,16 @@ # Download a file and retry upon failure. This looks like # an infinite loop but CI-level timeouts will kill it -function Download-File-With-Retries { - param( - [string]$url, - [string]$destfile - ) - $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed - do { - Write-Output "Downloading ${url}" - sleep 5; - Invoke-WebRequest -Uri $url -OutFile $destfile - } while(!$?); +function Get-File-With-Tenacity { + param( + [Parameter(Mandatory = $true)][string]$url, + [Parameter(Mandatory = $true)][string]$destfile + ) + $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed + do { + Write-Output "Downloading ${url}" + sleep 5 + Invoke-WebRequest -Uri $url -OutFile $destfile + } while (-not $?) } # External utilities like R.exe / Rscript.exe writing to stderr (even for harmless @@ -20,20 +20,23 @@ function Download-File-With-Retries { # Using standard PowerShell redirection does not work to avoid these errors. # This function uses R's built-in redirection mechanism, sink(). Any place where # this function is used is a command that writes harmless messages to stderr -function Run-R-Code-Redirect-Stderr { - param( - [string]$rcode - ) - $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()" - Rscript --vanilla -e $decorated_code +function Invoke-R-Code-Redirect-Stderr { + param( + [Parameter(Mandatory = $true)][string]$rcode + ) + $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()" + Rscript --vanilla -e $decorated_code } # Remove all items matching some pattern from PATH environment variable function Remove-From-Path { - param( - [string]$pattern_to_remove - ) - $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';' + [CmdletBinding(SupportsShouldProcess)] + param( + [Parameter(Mandatory = $true)][string]$pattern_to_remove + ) + if ($PSCmdlet.ShouldProcess($env:PATH, "Removing ${pattern_to_remove}")) { + $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';' + } } # remove some details that exist in the GitHub Actions images which might @@ -72,33 +75,39 @@ Remove-Item C:\rtools43 -Force -Recurse -ErrorAction Ignore # * some paths and file names are different on R4.0 $env:R_MAJOR_VERSION = $env:R_VERSION.split('.')[0] if ($env:R_MAJOR_VERSION -eq "3") { - # Rtools 3.x has to be installed at C:\Rtools\ - # * https://stackoverflow.com/a/46619260/3986677 - $RTOOLS_INSTALL_PATH = "C:\Rtools" - $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin" - $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin" - $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe" - $env:R_WINDOWS_VERSION = "3.6.3" + # Rtools 3.x has to be installed at C:\Rtools\ + # * https://stackoverflow.com/a/46619260/3986677 + $RTOOLS_INSTALL_PATH = "C:\Rtools" + $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin" + $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin" + $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe" + $env:R_WINDOWS_VERSION = "3.6.3" } elseif ($env:R_MAJOR_VERSION -eq "4") { - $RTOOLS_INSTALL_PATH = "C:\rtools43" - $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin" - $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin" - $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe" - $env:R_WINDOWS_VERSION = "4.3.1" + $RTOOLS_INSTALL_PATH = "C:\rtools43" + $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin" + $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin" + $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe" + $env:R_WINDOWS_VERSION = "4.3.1" } else { - Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION" - Check-Output $false + Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION" + Assert-Output $false } $env:CMAKE_VERSION = "3.30.0" $env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/' $env:R_LIBS = "$env:R_LIB_PATH" $env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation" -$env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH +$env:PATH = @( + "$env:RTOOLS_BIN", + "$env:RTOOLS_MINGW_BIN", + "$env:R_LIB_PATH/R/bin/x64", + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin", + "$env:PATH" +) -join ";" if ([version]$env:R_VERSION -lt [version]"4.0") { - $env:CRAN_MIRROR = "https://cran-archive.r-project.org" + $env:CRAN_MIRROR = "https://cran-archive.r-project.org" } else { - $env:CRAN_MIRROR = "https://cran.rstudio.com" + $env:CRAN_MIRROR = "https://cran.rstudio.com" } $env:MIKTEX_EXCEPTION_PATH = "$env:TEMP\miktex" @@ -109,198 +118,244 @@ if ($env:R_BUILD_TYPE -ne "cran") { } if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { - $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe" - $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" + $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe" + $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" } -cd $env:BUILD_SOURCESDIRECTORY +Set-Location "$env:BUILD_SOURCESDIRECTORY" tzutil /s "GMT Standard Time" -[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH) -[Void][System.IO.Directory]::CreateDirectory($env:CMAKE_PATH) +[Void][System.IO.Directory]::CreateDirectory("$env:R_LIB_PATH") +[Void][System.IO.Directory]::CreateDirectory("$env:CMAKE_PATH") # download R, RTools and CMake Write-Output "Downloading R, Rtools and CMake" -Download-File-With-Retries -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe" -Download-File-With-Retries -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe" -Download-File-With-Retries -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip" +$params = @{ + url = "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" + destfile = "R-win.exe" +} +Get-File-With-Tenacity @params + +$params = @{ + url = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" + destfile = "Rtools.exe" +} +Get-File-With-Tenacity @params + +$params = @{ + url = "https://github.com/Kitware/CMake/releases/download/v{0}/cmake-{0}-windows-x86_64.zip" -f $env:CMAKE_VERSION + destfile = "$env:CMAKE_PATH/cmake.zip" +} +Get-File-With-Tenacity @params # Install R Write-Output "Installing R" -Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Check-Output $? +$params = @{ + FilePath = "R-win.exe" + NoNewWindow = $true + Wait = $true + ArgumentList = "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" +} +Start-Process @params ; Assert-Output $? Write-Output "Done installing R" Write-Output "Installing Rtools" -Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Check-Output $? +$params = @{ + FilePath = "Rtools.exe" + NoNewWindow = $true + Wait = $true + ArgumentList = "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" +} +Start-Process @params; Assert-Output $? Write-Output "Done installing Rtools" Write-Output "Installing CMake" Add-Type -AssemblyName System.IO.Compression.FileSystem -[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Check-Output $? -# Remove old CMake shiped with RTools +[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $? +# Remove old CMake shipped with RTools Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore Write-Output "Done installing CMake" Write-Output "Installing dependencies" -$packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" -Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $? +$packages = -join @( + "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ", + "dependencies = c('Imports', 'Depends', 'LinkingTo')" +) +$params = -join @( + "options(install.packages.check.source = 'no'); ", + "install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', ", + "lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" +) +Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $? Write-Output "Building R-package" # R CMD check is not used for MSVC builds if ($env:COMPILER -ne "MSVC") { - $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz" - $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" - - if ($env:R_BUILD_TYPE -eq "cmake") { - if ($env:TOOLCHAIN -eq "MINGW") { - Write-Output "Telling R to use MinGW" - $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')" - } elseif ($env:TOOLCHAIN -eq "MSYS") { - Write-Output "Telling R to use MSYS" - $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')" - } elseif ($env:TOOLCHAIN -eq "MSVC") { - $env:BUILD_R_FLAGS = "'--skip-install'" - } else { - Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" - Check-Output $false + $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz" + $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" + + if ($env:R_BUILD_TYPE -eq "cmake") { + if ($env:TOOLCHAIN -eq "MINGW") { + Write-Output "Telling R to use MinGW" + $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')" + } elseif ($env:TOOLCHAIN -eq "MSYS") { + Write-Output "Telling R to use MSYS" + $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')" + } elseif ($env:TOOLCHAIN -eq "MSVC") { + $env:BUILD_R_FLAGS = "'--skip-install'" + } else { + Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" + Assert-Output $false + } + Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')" + Assert-Output $? + } elseif ($env:R_BUILD_TYPE -eq "cran") { + # NOTE: gzip and tar are needed to create a CRAN package on Windows, but + # some flavors of tar.exe can fail in some settings on Windows. + # Putting the msys64 utilities at the beginning of PATH temporarily to be + # sure they're used for that purpose. + if ($env:R_MAJOR_VERSION -eq "3") { + $env:PATH = @("C:\msys64\usr\bin", "$env:PATH") -join ";" + } + $params = -join @( + "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', ", + "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" + ) + Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $? + Remove-From-Path ".*msys64.*" + # Test CRAN source .tar.gz in a directory that is not this repo or below it. + # When people install.packages('lightgbm'), they won't have the LightGBM + # git repo around. This is to protect against the use of relative paths + # like ../../CMakeLists.txt that would only work if you are in the repoo + $R_CMD_CHECK_DIR = "tmp-r-cmd-check" + New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null + Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null + Set-Location "C:\$R_CMD_CHECK_DIR\" } - Run-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Check-Output $? - } elseif ($env:R_BUILD_TYPE -eq "cran") { - # NOTE: gzip and tar are needed to create a CRAN package on Windows, but - # some flavors of tar.exe can fail in some settings on Windows. - # Putting the msys64 utilities at the beginning of PATH temporarily to be - # sure they're used for that purpose. - if ($env:R_MAJOR_VERSION -eq "3") { - $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + + Write-Output "Running R CMD check" + if ($env:R_BUILD_TYPE -eq "cran") { + # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit) + $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" + } else { + $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" } - Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Check-Output $? - Remove-From-Path ".*msys64.*" - # Test CRAN source .tar.gz in a directory that is not this repo or below it. - # When people install.packages('lightgbm'), they won't have the LightGBM - # git repo around. This is to protect against the use of relative paths - # like ../../CMakeLists.txt that would only work if you are in the repoo - $R_CMD_CHECK_DIR = "tmp-r-cmd-check" - New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null - Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null - cd "C:\$R_CMD_CHECK_DIR\" - } - - Write-Output "Running R CMD check" - if ($env:R_BUILD_TYPE -eq "cran") { - # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit) - $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" - } else { - $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" - } - Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? - - Write-Output "R CMD check build logs:" - $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" - Get-Content -Path "$INSTALL_LOG_FILE_NAME" - - Check-Output $check_succeeded - - Write-Output "Looking for issues with R CMD check results" - if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { - echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" - Check-Output $False - } + $params = -join ( + "result <- processx::run(command = 'R.exe', args = $check_args, ", + "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" + ) + Invoke-R-Code-Redirect-Stderr $params ; $check_succeeded = $? + + Write-Output "R CMD check build logs:" + $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" + Assert-Output $check_succeeded + + Write-Output "Looking for issues with R CMD check results" + if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { + Write-Output "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" + Assert-Output $False + } } else { - $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" - Run-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? - Write-Output "----- build and install logs -----" - Get-Content -Path "$INSTALL_LOG_FILE_NAME" - Write-Output "----- end of build and install logs -----" - Check-Output $install_succeeded - # some errors are not raised above, but can be found in the logs - if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { - echo "ERRORs have been found installing lightgbm" - Check-Output $False - } + $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" + Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? + Write-Output "----- build and install logs -----" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" + Write-Output "----- end of build and install logs -----" + Assert-Output $install_succeeded + # some errors are not raised above, but can be found in the logs + if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { + Write-Output "ERRORs have been found installing lightgbm" + Assert-Output $False + } } # Checking that the correct R version was used if ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION" + $checks_cnt = $checks.Matches.length } else { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" - $checks_cnt = $checks.Matches.length + $checksParams = @{ + Path = "${INSTALL_LOG_FILE_NAME}" + Pattern = "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" + } + $checks = Select-String @checksParams + $checks_cnt = $checks.Matches.length } if ($checks_cnt -eq 0) { - Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs." - Check-Output $False + Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs." + Assert-Output $False } # Checking that we actually got the expected compiler. The R-package has some logic # to fail back to MinGW if MSVC fails, but for CI builds we need to check that the correct # compiler was used. if ($env:R_BUILD_TYPE -eq "cmake") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" - if ($checks.Matches.length -eq 0) { - Write-Output "The wrong compiler was used. Check the build logs." - Check-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" + if ($checks.Matches.length -eq 0) { + Write-Output "The wrong compiler was used. Check the build logs." + Assert-Output $False + } } # Checking that we got the right toolchain for MinGW. If using MinGW, both # MinGW and MSYS toolchains are supported if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN" - if ($checks.Matches.length -eq 0) { - Write-Output "The wrong toolchain was used. Check the build logs." - Check-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN" + if ($checks.Matches.length -eq 0) { + Write-Output "The wrong toolchain was used. Check the build logs." + Assert-Output $False + } } # Checking that MM_PREFETCH preprocessor definition is actually used in CI builds. if ($env:R_BUILD_TYPE -eq "cran") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes" + $checks_cnt = $checks.Matches.length } elseif ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success" + $checks_cnt = $checks.Matches.length } else { - $checks_cnt = 1 + $checks_cnt = 1 } if ($checks_cnt -eq 0) { - Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs." - Check-Output $False + Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs." + Assert-Output $False } # Checking that MM_MALLOC preprocessor definition is actually used in CI builds. if ($env:R_BUILD_TYPE -eq "cran") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes" + $checks_cnt = $checks.Matches.length } elseif ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success" + $checks_cnt = $checks.Matches.length } else { - $checks_cnt = 1 + $checks_cnt = 1 } if ($checks_cnt -eq 0) { - Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs." - Check-Output $False + Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs." + Assert-Output $False } # Checking that OpenMP is actually used in CMake builds. if ($env:R_BUILD_TYPE -eq "cmake") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*" - if ($checks.Matches.length -eq 0) { - Write-Output "OpenMP wasn't found. Check the build logs." - Check-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*" + if ($checks.Matches.length -eq 0) { + Write-Output "OpenMP wasn't found. Check the build logs." + Assert-Output $False + } } if ($env:COMPILER -eq "MSVC") { - Write-Output "Running tests with testthat.R" - cd R-package/tests - # NOTE: using Rscript.exe intentionally here, instead of Run-R-Code-Redirect-Stderr, - # because something about the interaction between Run-R-Code-Redirect-Stderr - # and testthat results in failing tests not exiting with a non-0 exit code. - Rscript.exe --vanilla "testthat.R" ; Check-Output $? + Write-Output "Running tests with testthat.R" + Set-Location R-package/tests + # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr, + # because something about the interaction between Invoke-R-Code-Redirect-Stderr + # and testthat results in failing tests not exiting with a non-0 exit code. + Rscript.exe --vanilla "testthat.R" ; Assert-Output $? } Write-Output "No issues were found checking the R-package" diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index 7d821676bb71..2e414ec0d282 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -20,12 +20,7 @@ fi # Get details needed for installing R components R_MAJOR_VERSION="${R_VERSION%.*}" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - export R_MAC_VERSION=3.6.3 - export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/R-${R_MAC_VERSION}.nn.pkg - export R_LINUX_VERSION="3.6.3-1bionic" - export R_APT_REPO="bionic-cran35/" -elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then +if [[ "${R_MAJOR_VERSION}" == "4" ]]; then export R_MAC_VERSION=4.3.1 export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-${ARCH}/base/R-${R_MAC_VERSION}-${ARCH}.pkg export R_LINUX_VERSION="4.3.1-1.2204.0" @@ -108,16 +103,10 @@ if [[ $OS_NAME == "macos" ]]; then export R_TIDYCMD=/usr/local/bin/tidy fi -# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6 -# "Warning: dependency ‘lattice’ is not available" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')" -else - # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. - # This should be unnecessary on R >=4.4.0 - # ref: https://github.com/microsoft/LightGBM/issues/6433 - Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" -fi +# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. +# This should be unnecessary on R >=4.4.0 +# ref: https://github.com/microsoft/LightGBM/issues/6433 +Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" # manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0 # ref: https://github.com/microsoft/LightGBM/issues/6433 @@ -125,12 +114,7 @@ Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/A # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat' # to avoid a CI-time dependency on devtools (for devtools::install_deps()) -# NOTE: testthat is not required when running rchk -if [[ "${TASK}" == "r-rchk" ]]; then - packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl')" -else - packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" -fi +packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" compile_from_source="both" if [[ $OS_NAME == "macos" ]]; then packages+=", type = 'binary'" @@ -166,28 +150,6 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then ./build-cran-package.sh || exit 1 - if [[ "${TASK}" == "r-rchk" ]]; then - echo "Checking R-package with rchk" - mkdir -p packages - cp "${PKG_TARBALL}" packages - RCHK_LOG_FILE="rchk-logs.txt" - docker run \ - -v "$(pwd)/packages:/rchk/packages" \ - kalibera/rchk:latest \ - "/rchk/packages/${PKG_TARBALL}" \ - > "${RCHK_LOG_FILE}" 2>&1 \ - || (cat ${RCHK_LOG_FILE} && exit 1) - cat ${RCHK_LOG_FILE} - - # the exceptions below are from R itself and not LightGBM: - # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156 - exit "$( - grep "${RCHK_LOG_FILE}" -v "in function strptime_internal" \ - | grep -v "in function RunGenCollect" \ - | grep --count -E '\[PB\]|ERROR' - )" - fi - # Test CRAN source .tar.gz in a directory that is not this repo or below it. # When people install.packages('lightgbm'), they won't have the LightGBM # git repo around. This is to protect against the use of relative paths diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1 index a2c498531262..264c13961aff 100644 --- a/.ci/test-windows.ps1 +++ b/.ci/test-windows.ps1 @@ -1,9 +1,9 @@ -function Check-Output { - param( [bool]$success ) - if (!$success) { - $host.SetShouldExit(-1) - exit 1 - } +function Assert-Output { + param( [Parameter(Mandatory = $true)][bool]$success ) + if (-not $success) { + $host.SetShouldExit(-1) + exit 1 + } } $env:CONDA_ENV = "test-env" @@ -17,41 +17,49 @@ Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore [Void][System.IO.Directory]::CreateDirectory($env:TMPDIR) if ($env:TASK -eq "r-package") { - & .\.ci\test-r-package-windows.ps1 ; Check-Output $? - Exit 0 + & .\.ci\test-r-package-windows.ps1 ; Assert-Output $? + Exit 0 } if ($env:TASK -eq "cpp-tests") { - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64 - cmake --build build --target testlightgbm --config Debug ; Check-Output $? - .\Debug\testlightgbm.exe ; Check-Output $? - Exit 0 + cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64 + cmake --build build --target testlightgbm --config Debug ; Assert-Output $? + .\Debug\testlightgbm.exe ; Assert-Output $? + Exit 0 } if ($env:TASK -eq "swig") { - $env:JAVA_HOME = $env:JAVA_HOME_8_X64 # there is pre-installed Eclipse Temurin 8 somewhere - $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed - Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl" - Add-Type -AssemblyName System.IO.Compression.FileSystem - [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Check-Output $? - $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" - $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH - $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" - cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? - Write-Output "CMake build logs:" - Get-Content -Path "$BuildLogFileName" - Check-Output $build_succeeded - $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe" - $checks_cnt = $checks.Matches.length - if ($checks_cnt -eq 0) { - Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs." - Check-Output $False - } - cmake --build build --target ALL_BUILD --config Release ; Check-Output $? - if ($env:AZURE -eq "true") { - cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $? - } - Exit 0 + $env:JAVA_HOME = $env:JAVA_HOME_8_X64 # there is pre-installed Eclipse Temurin 8 somewhere + $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed + $params = @{ + Uri = "https://sourceforge.net/projects/swig/files/latest/download" + OutFile = "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip" + UserAgent = "curl" + } + Invoke-WebRequest @params + Add-Type -AssemblyName System.IO.Compression.FileSystem + [System.IO.Compression.ZipFile]::ExtractToDirectory( + "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", + "$env:BUILD_SOURCESDIRECTORY/swig" + ) ; Assert-Output $? + $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" + $env:PATH = @("$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder", "$env:PATH") -join ";" + $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" + cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? + Write-Output "CMake build logs:" + Get-Content -Path "$BuildLogFileName" + Assert-Output $build_succeeded + $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe" + $checks_cnt = $checks.Matches.length + if ($checks_cnt -eq 0) { + Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs." + Assert-Output $False + } + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? + if ($env:AZURE -eq "true") { + cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $? + } + Exit 0 } # setup for Python @@ -61,82 +69,97 @@ conda config --set always_yes yes --set changeps1 no conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]" if ($env:PYTHON_VERSION -eq "3.7") { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt" } elseif ($env:PYTHON_VERSION -eq "3.8") { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt" } else { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt" } -conda create ` - -y ` - -n $env:CONDA_ENV ` - --file $env:CONDA_REQUIREMENT_FILE ` - "python=$env:PYTHON_VERSION[build=*cpython]" ; Check-Output $? +$condaParams = @( + "-y", + "-n", "$env:CONDA_ENV", + "--file", "$env:CONDA_REQUIREMENT_FILE", + "python=$env:PYTHON_VERSION[build=*cpython]" +) +conda create @condaParams ; Assert-Output $? if ($env:TASK -ne "bdist") { - conda activate $env:CONDA_ENV + conda activate $env:CONDA_ENV } -cd $env:BUILD_SOURCESDIRECTORY +Set-Location "$env:BUILD_SOURCESDIRECTORY" if ($env:TASK -eq "regular") { - cmake -B build -S . -A x64 ; Check-Output $? - cmake --build build --target ALL_BUILD --config Release ; Check-Output $? - sh ./build-python.sh install --precompile ; Check-Output $? - cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY - cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY -} -elseif ($env:TASK -eq "sdist") { - sh ./build-python.sh sdist ; Check-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Check-Output $? - cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $? -} -elseif ($env:TASK -eq "bdist") { - # Import the Chocolatey profile module so that the RefreshEnv command - # invoked below properly updates the current PowerShell session environment. - $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - Import-Module "$module" ; Check-Output $? - RefreshEnv + cmake -B build -S . -A x64 ; Assert-Output $? + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? + sh ./build-python.sh install --precompile ; Assert-Output $? + cp ./Release/lib_lightgbm.dll "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" + cp ./Release/lightgbm.exe "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" +} elseif ($env:TASK -eq "sdist") { + sh ./build-python.sh sdist ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + Set-Location dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? +} elseif ($env:TASK -eq "bdist") { + # Import the Chocolatey profile module so that the RefreshEnv command + # invoked below properly updates the current PowerShell session environment. + $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + Import-Module "$module" ; Assert-Output $? + RefreshEnv - Write-Output "Current OpenCL drivers:" - Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors + Write-Output "Current OpenCL drivers:" + Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors - conda activate $env:CONDA_ENV - sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Check-Output $? - cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $? - cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY + conda activate $env:CONDA_ENV + sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + Set-Location dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? + cp @(Get-ChildItem *py3-none-win_amd64.whl) "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { - if ($env:COMPILER -eq "MINGW") { - sh ./build-python.sh install --mingw ; Check-Output $? - } else { - sh ./build-python.sh install; Check-Output $? - } + if ($env:COMPILER -eq "MINGW") { + sh ./build-python.sh install --mingw ; Assert-Output $? + } else { + sh ./build-python.sh install; Assert-Output $? + } } if (($env:TASK -eq "sdist") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { - # cannot test C API with "sdist" task - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test" + # cannot test C API with "sdist" task + $tests = "$env:BUILD_SOURCESDIRECTORY/tests/python_package_test" } else { - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests" + $tests = "$env:BUILD_SOURCESDIRECTORY/tests" } if ($env:TASK -eq "bdist") { - # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py - $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1" + # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py + $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1" } -pytest $tests ; Check-Output $? +pytest $tests ; Assert-Output $? if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide - @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" - (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" # prevent interactive window mode - conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" - foreach ($file in @(Get-ChildItem *.py)) { - @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file - python $file ; Check-Output $? - } # run all examples - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks - (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb" - jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Check-Output $? # run all notebooks + Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide" + @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" + # Prevent interactive window mode + (Get-Content "plot_example.py").replace( + 'graph.render(view=True)', + 'graph.render(view=False)' + ) | Set-Content "plot_example.py" + conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" + # Run all examples + foreach ($file in @(Get-ChildItem *.py)) { + @( + "import sys, warnings", + -join @( + "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: ", + "sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))" + ) + ) + (Get-Content $file) | Set-Content $file + python $file ; Assert-Output $? + } + # Run all notebooks + Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks" + (Get-Content "interactive_plot_example.ipynb").replace( + 'INTERACTIVE = False', + 'assert False, \"Interactive mode disabled\"' + ) | Set-Content "interactive_plot_example.ipynb" + jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? } diff --git a/.ci/test.sh b/.ci/test.sh index 4bf44140dbfd..82c159064a33 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -42,7 +42,7 @@ else export MACOSX_DEPLOYMENT_TARGET=12.0 fi -if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then +if [[ "${TASK}" == "r-package" ]]; then bash "${BUILD_DIRECTORY}/.ci/test-r-package.sh" || exit 1 exit 0 fi @@ -98,8 +98,12 @@ if [[ $TASK == "swig" ]]; then fi if [[ $TASK == "lint" ]]; then + pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck" + echo "Linting PowerShell code" + pwsh -file ./.ci/lint-powershell.ps1 || exit 1 conda create -q -y -n "${CONDA_ENV}" \ "${CONDA_PYTHON_REQUIREMENT}" \ + 'biome>=1.9.3' \ 'cmakelint>=1.4.3' \ 'cpplint>=1.6.0' \ 'matplotlib-base>=3.9.1' \ @@ -110,12 +114,14 @@ if [[ $TASK == "lint" ]]; then 'r-lintr>=3.1.2' # shellcheck disable=SC1091 source activate "${CONDA_ENV}" - echo "Linting Python code" - bash ./.ci/lint-python.sh || exit 1 + echo "Linting Python and bash code" + bash ./.ci/lint-python-bash.sh || exit 1 echo "Linting R code" Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1 echo "Linting C++ code" bash ./.ci/lint-cpp.sh || exit 1 + echo "Linting JavaScript code" + bash ./.ci/lint-js.sh || exit 1 exit 0 fi @@ -140,8 +146,8 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then make -C docs html || exit 1 if [[ $TASK == "check-links" ]]; then # check docs for broken links - pip install linkchecker - linkchecker --config=.linkcheckerrc ./docs/_build/html/*.html || exit 1 + pip install 'linkchecker>=10.5.0' + linkchecker --config=./docs/.linkcheckerrc ./docs/_build/html/*.html || exit 1 exit 0 fi # check the consistency of parameters' descriptions and other stuff diff --git a/.editorconfig b/.editorconfig index f7bd94f4f905..e7191b63c1d3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,22 +1,19 @@ root = true [*] -charset=utf-8 +charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true +end_of_line = lf indent_style = space indent_size = 2 -[*.{py,sh,js}] +[*.{py,sh,ps1,js,json}] indent_size = 4 -line_length = 120 +max_line_length = 120 skip = external_libs known_first_party = lightgbm -# Placeholder files -[{*.gitkeep,__init__.py}] -insert_final_newline = none - # Tabs matter for Makefile and .gitmodules [{makefile*,Makefile*,*.mk,*.mak,*.makefile,*.Makefile,GNUmakefile,BSDmakefile,make.bat,Makevars*,*.gitmodules}] indent_style = tab diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml index 4efe658b7f45..195fd5f1c8f1 100644 --- a/.github/workflows/lock.yml +++ b/.github/workflows/lock.yml @@ -39,7 +39,7 @@ jobs: This pull request has been automatically locked since there has not been any recent activity since it was closed. To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues including a reference to this. - # what shoulld the locking status be? + # what should the locking status be? issue-lock-reason: 'resolved' pr-lock-reason: 'resolved' process-only: 'issues, prs' diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 1758583ad8e4..66e05a18ba1f 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -14,10 +14,6 @@ concurrency: cancel-in-progress: true env: - # https://github.com/actions/checkout/issues/1590#issuecomment-2207052044 - # - # this could be removed (hopefully) when R 3.6 support is removed - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true # in CMake-driven builds, parallelize compilation CMAKE_BUILD_PARALLEL_LEVEL: 4 # on Debian-based images, avoid interactive prompts @@ -48,12 +44,6 @@ jobs: ################ # CMake builds # ################ - - os: ubuntu-latest - task: r-package - compiler: gcc - r_version: 3.6 - build_type: cmake - container: 'ubuntu:18.04' - os: ubuntu-latest task: r-package compiler: gcc @@ -174,19 +164,12 @@ jobs: run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 5 submodules: true - name: Install pandoc uses: r-lib/actions/setup-pandoc@v2 - if: matrix.container != 'ubuntu:18.04' - # R 3.6 binary isn't easily available on Ubuntu 18.04, - # but setup-pandoc>=2.7.1 is uses a too-new glibc for it. - # ref: https://github.com/microsoft/LightGBM/issues/6298 - - name: Install pandoc - uses: r-lib/actions/setup-pandoc@v2.6.0 - if: matrix.container == 'ubuntu:18.04' - name: Install tinytex if: startsWith(matrix.os, 'windows') uses: r-lib/actions/setup-tinytex@v2 @@ -274,6 +257,7 @@ jobs: - clang19 - gcc14 - intel + - rchk runs-on: ubuntu-latest container: ghcr.io/r-hub/containers/${{ matrix.image }}:latest steps: @@ -311,8 +295,32 @@ jobs: - name: Install packages and run tests shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh + + # 'rchk' isn't run through 'R CMD check', use the approach documented at + # https://r-hub.github.io/containers/local.html + if [[ "${{ matrix.image }}" =~ "rchk" ]]; then + r-check "$(pwd)" \ + | tee ./rchk-logs.txt 2>&1 + + # the '-v' exceptions below are from R/rchk itself and not LightGBM: + # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156 + if grep -E '\[PB\]|ERROR' ./rchk-logs.txt \ + | grep -v 'too many states' \ + > /dev/null; \ + then + echo "rchk found issues" + exit 1 + else + echo "rchk did not find any issues" + exit 0 + fi + fi + + # 'testthat' is not needed by 'rchk', so avoid installing it until here + Rscript -e "install.packages('testthat', repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + if [[ "${{ matrix.image }}" =~ "clang" ]]; then # allowing the following NOTEs (produced by default in the clang images): # diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e5e5dd8e9d9..0edab8df1be6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,15 +17,9 @@ repos: hooks: - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - args: ["--settings-path", "python-package/pyproject.toml"] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.7.0 + rev: v0.8.3 hooks: # Run the linter. - id: ruff @@ -38,4 +32,10 @@ repos: - repo: https://github.com/shellcheck-py/shellcheck-py rev: v0.10.0.1 hooks: - - id: shellcheck + - id: shellcheck + - repo: https://github.com/crate-ci/typos + rev: v1.28.3 + hooks: + - id: typos + args: ["--force-exclude"] + exclude: (\.gitignore$)|(^\.editorconfig$) diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 000000000000..6dc2c2c97529 --- /dev/null +++ b/.typos.toml @@ -0,0 +1,21 @@ +default.extend-ignore-re = [ + "/Ot", + "mis-alignment", + "mis-spelled", + "posix-seh-rt", +] + +[default.extend-words] +MAPE = "MAPE" +datas = "datas" +interprete = "interprete" +mape = "mape" +splitted = "splitted" + +[default.extend-identifiers] +ERRORs = "ERRORs" +GAM = "GAM" +ND24s = "ND24s" +WARNINGs = "WARNINGs" +fullset = "fullset" +thess = "thess" diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 3a111e10898e..40424840c82d 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -69,15 +69,17 @@ jobs: # check disk usage print-diagnostics # remove old containers, container images, volumes - # ref: https://stackoverflow.com/a/32723127/3986677) + # ref: https://stackoverflow.com/a/32723127/3986677 + # ref: https://depot.dev/blog/docker-clear-cache#removing-everything-with-docker-system-prune echo "---- running 'docker system prune' ----" /tmp/docker system prune \ --all \ --force \ + --volumes \ --filter until=720h # check disk usage again print-diagnostics - displayName: clean + displayName: Clean ########################################### - job: Linux ########################################### diff --git a/CMakeLists.txt b/CMakeLists.txt index 167c625a8607..45a8ecea0577 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ option(USE_SWIG "Enable SWIG to generate Java API" OFF) option(USE_TIMETAG "Set to ON to output time costs" OFF) option(USE_CUDA "Enable CUDA-accelerated training " OFF) option(USE_DEBUG "Set to ON for Debug mode" OFF) -option(USE_SANITIZER "Use santizer flags" OFF) +option(USE_SANITIZER "Use sanitizer flags" OFF) set( ENABLED_SANITIZERS "address" "leak" "undefined" diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index a13516ff6569..85a91b1ce058 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -1114,7 +1114,7 @@ predict.lgb.Booster <- function(object, #' #' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} #' will cause it to ignore the fast-predict configuration and take the slow route instead -#' (but be aware that an existing configuration might not always be overriden by supplying +#' (but be aware that an existing configuration might not always be overridden by supplying #' different parameters or prediction type, so make sure to check that the output is what #' was expected when a prediction is to be made on a single row for something different than #' what is configured). @@ -1128,7 +1128,7 @@ predict.lgb.Booster <- function(object, #' and as such, this function will produce an error if passing \code{csr=TRUE} and #' \code{type = "contrib"} together. #' @inheritParams lgb_predict_shared_params -#' @param model LighGBM model object (class \code{lgb.Booster}). +#' @param model LightGBM model object (class \code{lgb.Booster}). #' #' \bold{The object will be modified in-place}. #' @param csr Whether the prediction function is going to be called on sparse CSR inputs. diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 7c76131f4f53..d60507cf00d4 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -9,7 +9,7 @@ #' \item{\code{Feature}: Feature names in the model.} #' \item{\code{Gain}: The total gain of this feature's splits.} #' \item{\code{Cover}: The number of observation related to this feature.} -#' \item{\code{Frequency}: The number of times a feature splited in trees.} +#' \item{\code{Frequency}: The number of times a feature split in trees.} #' } #' #' @examples diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index db4ef955f866..ac1b2f9aaf14 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -10,7 +10,7 @@ #' \emph{New in version 4.4.0} #' #' @return -#' A \code{data.table} with detailed information about model trees' nodes and leafs. +#' A \code{data.table} with detailed information about model trees' nodes and leaves. #' #' The columns of the \code{data.table} are: #' diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index efa593ffe12f..6cb4eebd8baf 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -139,7 +139,7 @@ NULL #' system, but be aware that getting the number of cores detected correctly requires package #' \code{RhpcBLASctl} to be installed. #' -#' This parameter gets overriden by \code{num_threads} and its aliases under \code{params} +#' This parameter gets overridden by \code{num_threads} and its aliases under \code{params} #' if passed there. #' #' \emph{New in version 4.0.0} diff --git a/R-package/configure b/R-package/configure index 11d691674f69..56a1fcc49105 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1789,7 +1789,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mmprefetch}" >&5 printf "%s\n" "${ac_mmprefetch}" >&6; } if test "${ac_mmprefetch}" = yes; then - LGB_CPPFLAGS+=" -DMM_PREFETCH=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1" fi ############ @@ -1824,7 +1824,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mm_malloc}" >&5 printf "%s\n" "${ac_mm_malloc}" >&6; } if test "${ac_mm_malloc}" = yes; then - LGB_CPPFLAGS+=" -DMM_MALLOC=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1" fi ########## @@ -1850,11 +1850,11 @@ then # If Homebrew is found and libomp was installed with it, this code adds the necessary # flags for the compiler to find libomp headers and for the linker to find libomp.dylib. HOMEBREW_LIBOMP_PREFIX="" - if command -v brew &> /dev/null; then + if command -v brew >/dev/null 2>&1; then ac_brew_openmp=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP was installed via Homebrew" >&5 printf %s "checking whether OpenMP was installed via Homebrew... " >&6; } - brew --prefix libomp &>/dev/null && ac_brew_openmp=yes + brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_brew_openmp}" >&5 printf "%s\n" "${ac_brew_openmp}" >&6; } if test "${ac_brew_openmp}" = yes; then diff --git a/R-package/configure.ac b/R-package/configure.ac index dad365be691c..d0f0462aef60 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -60,7 +60,7 @@ AC_LANG_CONFTEST( ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mmprefetch=yes AC_MSG_RESULT([${ac_mmprefetch}]) if test "${ac_mmprefetch}" = yes; then - LGB_CPPFLAGS+=" -DMM_PREFETCH=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1" fi ############ @@ -86,7 +86,7 @@ AC_LANG_CONFTEST( ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mm_malloc=yes AC_MSG_RESULT([${ac_mm_malloc}]) if test "${ac_mm_malloc}" = yes; then - LGB_CPPFLAGS+=" -DMM_MALLOC=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1" fi ########## @@ -112,10 +112,10 @@ then # If Homebrew is found and libomp was installed with it, this code adds the necessary # flags for the compiler to find libomp headers and for the linker to find libomp.dylib. HOMEBREW_LIBOMP_PREFIX="" - if command -v brew &> /dev/null; then + if command -v brew >/dev/null 2>&1; then ac_brew_openmp=no AC_MSG_CHECKING([whether OpenMP was installed via Homebrew]) - brew --prefix libomp &>/dev/null && ac_brew_openmp=yes + brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes AC_MSG_RESULT([${ac_brew_openmp}]) if test "${ac_brew_openmp}" = yes; then HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp` diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index 0324f83f2da9..9f74ef7f4b2a 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -51,7 +51,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # For example, we are doing logistic loss, the prediction is score before logistic transformation # Keep this in mind when you use the customization, and maybe you need write customized evaluation function evalerror <- function(preds, dtrain) { diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index 6ca214c5ac7b..4435dd1b09b6 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -29,7 +29,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # For example, we are doing logistic loss, the prediction is score before logistic transformation # The built-in evaluation error assumes input is after logistic transformation # Keep this in mind when you use the customization, and maybe you need write customized evaluation function diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd index e02600451df5..9cd4339bdced 100644 --- a/R-package/man/lgb.configure_fast_predict.Rd +++ b/R-package/man/lgb.configure_fast_predict.Rd @@ -14,7 +14,7 @@ lgb.configure_fast_predict( ) } \arguments{ -\item{model}{LighGBM model object (class \code{lgb.Booster}). +\item{model}{LightGBM model object (class \code{lgb.Booster}). \bold{The object will be modified in-place}.} @@ -98,7 +98,7 @@ Calling this function multiple times with different parameters might not overrid Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} will cause it to ignore the fast-predict configuration and take the slow route instead - (but be aware that an existing configuration might not always be overriden by supplying + (but be aware that an existing configuration might not always be overridden by supplying different parameters or prediction type, so make sure to check that the output is what was expected when a prediction is to be made on a single row for something different than what is configured). diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index 79cb82f5d8ef..5099643112be 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -17,7 +17,7 @@ For a tree model, a \code{data.table} with the following columns: \item{\code{Feature}: Feature names in the model.} \item{\code{Gain}: The total gain of this feature's splits.} \item{\code{Cover}: The number of observation related to this feature.} - \item{\code{Frequency}: The number of times a feature splited in trees.} + \item{\code{Frequency}: The number of times a feature split in trees.} } } \description{ diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index ecfee17332f5..df36b6a94f42 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -18,7 +18,7 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L) \emph{New in version 4.4.0}} } \value{ -A \code{data.table} with detailed information about model trees' nodes and leafs. +A \code{data.table} with detailed information about model trees' nodes and leaves. The columns of the \code{data.table} are: diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 90cb3166bf5c..376a6d03a6b1 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -93,7 +93,7 @@ set to the iteration number of the best iteration.} system, but be aware that getting the number of cores detected correctly requires package \code{RhpcBLASctl} to be installed. - This parameter gets overriden by \code{num_threads} and its aliases under \code{params} + This parameter gets overridden by \code{num_threads} and its aliases under \code{params} if passed there. \emph{New in version 4.0.0}} diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index c734816b4038..06d35a146d66 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -9,7 +9,7 @@ set.seed(708L) # to an accumulator then returns the current value. # This is used to mock the situation where an evaluation # metric increases every iteration -ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" +ACCUMULATOR_NAME <- "INCREASING_METRIC_ACCUMULATOR" assign(x = ACCUMULATOR_NAME, value = 0.0, envir = .GlobalEnv) .increasing_metric <- function(preds, dtrain) { @@ -1777,7 +1777,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th , early_stopping_rounds + 1L ) - # Booster should understand thatt all three of these metrics should be minimized + # Booster should understand that all three of these metrics should be minimized eval_info <- bst$.__enclos_env__$private$get_eval_info() expect_identical(eval_info, c("mape", "rmse", "l1")) expect_identical( @@ -2345,7 +2345,7 @@ test_that("early stopping works with lgb.cv()", { # never changes, its first iteration was the best oone expect_equal(bst$best_iter, 1L) - # best_score should be taken from the first metri + # best_score should be taken from the first metric expect_equal(bst$best_score, 0.2) # early stopping should have happened, since constant_metric was the first diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 2c10b9d571dc..a1baf0067c4a 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -14,7 +14,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # Keep this in mind when you use the customization, and maybe you need write customized evaluation function evalerror <- function(preds, dtrain) { labels <- get_field(dtrain, "label") diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R index 322a80a55bc5..cfcd1c942f31 100644 --- a/R-package/tests/testthat/test_lgb.interprete.R +++ b/R-package/tests/testthat/test_lgb.interprete.R @@ -5,7 +5,7 @@ log(x / (1.0 - x)) } -test_that("lgb.intereprete works as expected for binary classification", { +test_that("lgb.interprete works as expected for binary classification", { data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/tests/testthat/test_lgb.plot.interpretation.R b/R-package/tests/testthat/test_lgb.plot.interpretation.R index 6cba9927942a..e8a021fc7237 100644 --- a/R-package/tests/testthat/test_lgb.plot.interpretation.R +++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R @@ -5,7 +5,7 @@ log(x / (1.0 - x)) } -test_that("lgb.plot.interepretation works as expected for binary classification", { +test_that("lgb.plot.interpretation works as expected for binary classification", { data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) @@ -57,7 +57,7 @@ test_that("lgb.plot.interepretation works as expected for binary classification" expect_null(plot_res) }) -test_that("lgb.plot.interepretation works as expected for multiclass classification", { +test_that("lgb.plot.interpretation works as expected for multiclass classification", { data(iris) # We must convert factors to numeric diff --git a/README.md b/README.md index f151c9db2ebe..39108559e8bc 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,8 @@ lightgbm-transform (feature transformation binding): https://github.com/microsof `postgresml` (LightGBM training and prediction in SQL, via a Postgres extension): https://github.com/postgresml/postgresml +`pyodide` (run `lightgbm` Python-package in a web browser): https://github.com/pyodide/pyodide + `vaex-ml` (Python DataFrame library with its own interface to LightGBM): https://github.com/vaexio/vaex Support diff --git a/biome.json b/biome.json new file mode 100644 index 000000000000..5029d037189e --- /dev/null +++ b/biome.json @@ -0,0 +1,21 @@ +{ + "files": { + "ignore": [".mypy_cache/"] + }, + "formatter": { + "enabled": true, + "useEditorconfig": true + }, + "organizeImports": { + "enabled": true + }, + "linter": { + "enabled": true, + "rules": { + "all": true + } + }, + "javascript": { + "globals": ["$"] + } +} diff --git a/build-python.sh b/build-python.sh index cf790737729e..ff37e4afe225 100755 --- a/build-python.sh +++ b/build-python.sh @@ -205,7 +205,6 @@ create_isolated_source_dir() { cp -R ./include ./lightgbm-python cp -R ./src ./lightgbm-python cp -R ./swig ./lightgbm-python - cp -R ./windows ./lightgbm-python # include only specific files from external_libs, to keep the package # small and avoid redistributing code with licenses incompatible with @@ -303,8 +302,7 @@ if test "${INSTALL}" = true; then ./external_libs \ ./include \ ./src \ - ./swig \ - ./windows + ./swig # use regular-old setuptools for these builds, to avoid # trying to recompile the shared library sed -i.bak -e '/start:build-system/,/end:build-system/d' pyproject.toml diff --git a/cmake/Sanitizer.cmake b/cmake/Sanitizer.cmake index a3768effac0d..f99048476d8b 100644 --- a/cmake/Sanitizer.cmake +++ b/cmake/Sanitizer.cmake @@ -18,7 +18,7 @@ macro(enable_sanitizer sanitizer) set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined") else() - message(FATAL_ERROR "Santizer ${sanitizer} not supported.") + message(FATAL_ERROR "Sanitizer ${sanitizer} not supported.") endif() endmacro() diff --git a/docker/README.md b/docker/README.md index dfedc2f4e3f1..e68346545ccf 100644 --- a/docker/README.md +++ b/docker/README.md @@ -55,7 +55,7 @@ After this runs, a LightGBM model can be found at `LightGBM-CLI-model.txt`. For more details on how to configure and use the LightGBM CLI, see https://lightgbm.readthedocs.io/en/latest/Quick-Start.html. -## Running the Python-package Сontainer +## Running the Python-package Container Build an image with the LightGBM Python-package installed. @@ -114,7 +114,7 @@ docker run \ python ``` -## Running the R-package Сontainer +## Running the R-package Container Build an image with the LightGBM R-package installed. diff --git a/docs/.linkcheckerrc b/docs/.linkcheckerrc index 003d8699a875..a4707aa536ea 100644 --- a/docs/.linkcheckerrc +++ b/docs/.linkcheckerrc @@ -1,9 +1,9 @@ [checking] -maxrequestspersecond=1 +maxrequestspersecond=0.1 recursionlevel=1 anchors=1 sslverify=0 -threads=1 +threads=4 [filtering] ignore= diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst index 41b84f9b82c2..1e28d037388d 100644 --- a/docs/Installation-Guide.rst +++ b/docs/Installation-Guide.rst @@ -1,17 +1,30 @@ Installation Guide ================== -This is a guide for building the LightGBM Command Line Interface (CLI). If you want to build the Python-package or R-package please refer to `Python-package`_ and `R-package`_ folders respectively. - All instructions below are aimed at compiling the 64-bit version of LightGBM. It is worth compiling the 32-bit version only in very rare special cases involving environmental limitations. The 32-bit version is slow and untested, so use it at your own risk and don't forget to adjust some of the commands below when installing. +By default, instructions below will use **VS Build Tools** or **make** tool to compile the code. +It it possible to use `Ninja`_ tool instead of make on all platforms, but VS Build Tools cannot be replaced with Ninja. +You can add ``-G Ninja`` to CMake flags to use Ninja. + +By default, instructions below will produce a shared library file and an executable file with command-line interface. +You can add ``-DBUILD_CLI=OFF`` to CMake flags to disable the executable compilation. + If you need to build a static library instead of a shared one, you can add ``-DBUILD_STATIC_LIB=ON`` to CMake flags. +By default, instructions below will place header files into system-wide folder. +You can add ``-DINSTALL_HEADERS=OFF`` to CMake flags to disable headers installation. + +By default, on macOS, CMake is looking into Homebrew standard folders for finding dependencies (e.g. OpenMP). +You can add ``-DUSE_HOMEBREW_FALLBACK=OFF`` to CMake flags to disable this behaviour. + Users who want to perform benchmarking can make LightGBM output time costs for different internal routines by adding ``-DUSE_TIMETAG=ON`` to CMake flags. -It is possible to build LightGBM in debug mode. In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM. +It is possible to build LightGBM in debug mode. +In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. +To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM. .. _sanitizers: @@ -30,7 +43,7 @@ It is very useful to build `C++ unit tests <#build-c-unit-tests>`__ with sanitiz .. _nightly-builds: -You can also download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|. +You can download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|. .. contents:: **Contents** :depth: 1 @@ -40,12 +53,10 @@ You can also download the artifacts of the latest successful build on master bra Windows ~~~~~~~ -On Windows LightGBM can be built using +On Windows, LightGBM can be built using - **Visual Studio**; - - **CMake** and **VS Build Tools**; - - **CMake** and **MinGW**. Visual Studio (or VS Build Tools) @@ -54,22 +65,23 @@ Visual Studio (or VS Build Tools) With GUI ******** -1. Install `Visual Studio`_ (2015 or newer). +1. Install `Visual Studio`_. 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -3. Go to ``LightGBM-master/windows`` folder. +3. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder. +The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder. From Command Line ***************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -98,7 +110,7 @@ MinGW-w64 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder. -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. It is recommended that you use **Visual Studio** since it has better multithreading efficiency in **Windows** for many-core systems (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__). @@ -106,9 +118,17 @@ It is recommended that you use **Visual Studio** since it has better multithread Linux ~~~~~ -On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**. +On Linux, LightGBM can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -1. Install `CMake`_. +gcc +^^^ + +1. Install `CMake`_ and **gcc**. 2. Run the following commands: @@ -119,53 +139,69 @@ On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**. cmake -B build -S . cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). +Clang +^^^^^ -Using ``Ninja`` -^^^^^^^^^^^^^^^ +1. Install `CMake`_, **Clang** and **OpenMP**. -On Linux, LightGBM can also be built with `Ninja `__ instead of ``make``. +2. Run the following commands: -.. code:: sh + .. code:: sh git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -G 'Ninja' - cmake --build build -j2 + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . + cmake --build build -j4 macOS ~~~~~ -On macOS LightGBM can be installed using **Homebrew**, or can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, LightGBM can be installed using -Apple Clang -^^^^^^^^^^^ +- **Homebrew**; +- **MacPorts**; + +or can be built using -Only **Apple Clang** version 8.1 or higher is supported. +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. Install Using ``Homebrew`` -************************** +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: sh brew install lightgbm -Build from GitHub -***************** +Refer to https://formulae.brew.sh/formula/lightgbm for more details. -1. Install `CMake`_ : +Install Using ``MacPorts`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ - .. code:: sh +.. code:: sh - brew install cmake + sudo port install LightGBM + +Refer to https://ports.macports.org/port/LightGBM for more details. + +**Note**: Port for LightGBM is not maintained by LightGBM's maintainers. -2. Install **OpenMP**: +Build from GitHub +^^^^^^^^^^^^^^^^^ + +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. + +Apple Clang +*********** + +1. Install `CMake`_ and **OpenMP**: .. code:: sh - brew install libomp + brew install cmake libomp -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -175,21 +211,15 @@ Build from GitHub cmake --build build -j4 gcc -^^^ - -1. Install `CMake`_ : - - .. code:: sh - - brew install cmake +*** -2. Install **gcc**: +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install gcc + brew install cmake gcc -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -213,12 +243,10 @@ You can build LightGBM without OpenMP support but it is **strongly not recommend Windows ^^^^^^^ -On Windows a version of LightGBM without OpenMP support can be built using +On Windows, a version of LightGBM without OpenMP support can be built using - **Visual Studio**; - - **CMake** and **VS Build Tools**; - - **CMake** and **MinGW**. Visual Studio (or VS Build Tools) @@ -227,26 +255,27 @@ Visual Studio (or VS Build Tools) With GUI -------- -1. Install `Visual Studio`_ (2015 or newer). +1. Install `Visual Studio`_. 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -3. Go to ``LightGBM-master/windows`` folder. +3. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -4. Open ``LightGBM.sln`` file with **Visual Studio**. +4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library. -5. Go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``. +5. Go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``. -6. Get back to the project's main screen, then choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +6. Get back to the project's main screen and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder. +The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder. From Command Line ----------------- -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -275,14 +304,36 @@ MinGW-w64 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder. -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. Linux ^^^^^ -On Linux a version of LightGBM without OpenMP support can be built using **CMake** and **gcc** or **Clang**. +On Linux, a version of LightGBM without OpenMP support can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. + +gcc +*** + +1. Install `CMake`_ and **gcc**. + +2. Run the following commands: + + .. code:: sh -1. Install `CMake`_. + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_OPENMP=OFF + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_ and **Clang**. 2. Run the following commands: @@ -290,20 +341,24 @@ On Linux a version of LightGBM without OpenMP support can be built using **CMake git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine cmake -B build -S . -DUSE_OPENMP=OFF cmake --build build -j4 macOS ^^^^^ -On macOS a version of LightGBM without OpenMP support can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, a version of LightGBM without OpenMP support can be built using + +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. + +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. Apple Clang *********** -Only **Apple Clang** version 8.1 or higher is supported. - -1. Install `CMake`_ : +1. Install `CMake`_: .. code:: sh @@ -321,19 +376,13 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install cmake + brew install cmake gcc -2. Install **gcc**: - - .. code:: sh - - brew install gcc - -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -354,35 +403,36 @@ If you need to run a distributed learning application with high performance comm Windows ^^^^^^^ -On Windows an MPI version of LightGBM can be built using +On Windows, an MPI version of LightGBM can be built using - **MS MPI** and **Visual Studio**; - - **MS MPI**, **CMake** and **VS Build Tools**. +**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it. + With GUI ******** 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed. -2. Install `Visual Studio`_ (2015 or newer). +2. Install `Visual Studio`_. 3. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -4. Go to ``LightGBM-master/windows`` folder. +4. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release_mpi`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release_mpi`` folder. From Command Line ***************** 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed. -2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 3. Run the following commands: @@ -395,18 +445,22 @@ From Command Line The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder. -**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it. - Linux ^^^^^ -On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **gcc** or **Clang**. +On Linux, an MPI version of LightGBM can be built using -1. Install `Open MPI`_. +- **CMake**, **gcc** and **Open MPI**; +- **CMake**, **Clang** and **Open MPI**. -2. Install `CMake`_. +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -3. Run the following commands: +gcc +*** + +1. Install `CMake`_, **gcc** and `Open MPI`_. + +2. Run the following commands: .. code:: sh @@ -415,37 +469,41 @@ On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** a cmake -B build -S . -DUSE_MPI=ON cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). - -macOS -^^^^^ +Clang +***** -On macOS an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **Apple Clang** or **gcc**. +1. Install `CMake`_, **Clang**, **OpenMP** and `Open MPI`_. -Apple Clang -*********** +2. Run the following commands: -Only **Apple Clang** version 8.1 or higher is supported. + .. code:: sh -1. Install `CMake`_ : + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_MPI=ON + cmake --build build -j4 - .. code:: sh +macOS +^^^^^ - brew install cmake +On macOS, an MPI version of LightGBM can be built using -2. Install **OpenMP**: +- **CMake**, **Open MPI** and **Apple Clang**; +- **CMake**, **Open MPI** and **gcc**. - .. code:: sh +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. - brew install libomp +Apple Clang +*********** -3. Install **Open MPI**: +1. Install `CMake`_, **OpenMP** and `Open MPI`_: .. code:: sh - brew install open-mpi + brew install cmake libomp open-mpi -4. Run the following commands: +2. Run the following commands: .. code:: sh @@ -457,25 +515,13 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : - - .. code:: sh - - brew install cmake - -2. Install **gcc**: +1. Install `CMake`_, `Open MPI`_ and **gcc**: .. code:: sh - brew install gcc + brew install cmake open-mpi gcc -3. Install **Open MPI**: - - .. code:: sh - - brew install open-mpi - -4. Run the following commands: +2. Run the following commands: .. code:: sh @@ -488,48 +534,19 @@ gcc Build GPU Version ~~~~~~~~~~~~~~~~~ -Linux -^^^^^ - -On Linux a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **gcc** or **Clang**. - -The following dependencies should be installed before compilation: - -- **OpenCL** 1.2 headers and libraries, which is usually provided by GPU manufacture. - - The generic OpenCL ICD packages (for example, Debian package ``ocl-icd-libopencl1`` and ``ocl-icd-opencl-dev``) can also be used. - -- **libboost** 1.56 or later (1.61 or later is recommended). - - We use Boost.Compute as the interface to GPU, which is part of the Boost library since version 1.61. However, since we include the source code of Boost.Compute as a submodule, we only require the host has Boost 1.56 or later installed. We also use Boost.Align for memory allocation. Boost.Compute requires Boost.System and Boost.Filesystem to store offline kernel cache. - - The following Debian packages should provide necessary Boost libraries: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``. - -- **CMake** - -To build LightGBM GPU version, run the following commands: - -.. code:: sh - - git clone --recursive https://github.com/microsoft/LightGBM - cd LightGBM - cmake -B build -S . -DUSE_GPU=1 - # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: - # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ - cmake --build build - -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). - Windows ^^^^^^^ -On Windows a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **VS Build Tools** or **MinGW**. +On Windows, a GPU version of LightGBM (``device_type=gpu``) can be built using + +- **OpenCL**, **Boost**, **CMake** and **VS Build Tools**; +- **OpenCL**, **Boost**, **CMake** and **MinGW**. If you use **MinGW**, the build procedure is similar to the build on Linux. Following procedure is for the **MSVC** (Microsoft Visual C++) build. -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is installed). 2. Install **OpenCL** for Windows. The installation depends on the brand (NVIDIA, AMD, Intel) of your GPU card. @@ -559,13 +576,68 @@ Following procedure is for the **MSVC** (Microsoft Visual C++) build. git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 + cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: - # cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include" + # cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include" cmake --build build --target ALL_BUILD --config Release **Note**: ``C:/local/boost_1_63_0`` and ``C:/local/boost_1_63_0/lib64-msvc-14.0`` are locations of your **Boost** binaries (assuming you've downloaded 1.63.0 version for Visual Studio 2015). +The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder. + +Linux +^^^^^ + +On Linux, a GPU version of LightGBM (``device_type=gpu``) can be built using + +- **CMake**, **OpenCL**, **Boost** and **gcc**; +- **CMake**, **OpenCL**, **Boost** and **Clang**. + +**OpenCL** headers and libraries are usually provided by GPU manufacture. +The generic OpenCL ICD packages (for example, Debian packages ``ocl-icd-libopencl1``, ``ocl-icd-opencl-dev``, ``pocl-opencl-icd``) can also be used. + +Required **Boost** libraries (Boost.Align, Boost.System, Boost.Filesystem, Boost.Chrono) should be provided by the following Debian packages: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``, ``libboost-chrono-dev``. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. + +gcc +*** + +1. Install `CMake`_, **gcc**, **OpenCL** and **Boost**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_GPU=ON + # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: + # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_, **Clang**, **OpenMP**, **OpenCL** and **Boost**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_GPU=ON + # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: + # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ + cmake --build build -j4 + +macOS +^^^^^ + +The GPU version is not supported on macOS. + Docker ^^^^^^ @@ -574,60 +646,84 @@ Refer to `GPU Docker folder `__ of LightGBM (``device_type=gpu``) is based on OpenCL. +The `original GPU version <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL. -The CUDA-based build (``device_type=cuda``) is a separate implementation. +The CUDA-based version (``device_type=cuda``) is a separate implementation. Use this version in Linux environments with an NVIDIA GPU with compute capability 6.0 or higher. +Windows +^^^^^^^ + +The CUDA version is not supported on Windows. +Use the `GPU version <#build-gpu-version>`__ (``device_type=gpu``) for GPU acceleration on Windows. + Linux ^^^^^ -On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and **gcc** or **Clang**. +On Linux, a CUDA version of LightGBM can be built using -The following dependencies should be installed before compilation: +- **CMake**, **gcc** and **CUDA**; +- **CMake**, **Clang** and **CUDA**. -- **CUDA** 11.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers. +Please refer to `this detailed guide`_ for **CUDA** libraries installation. -- **CMake** +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -To build LightGBM CUDA version, run the following commands: +gcc +*** -.. code:: sh +1. Install `CMake`_, **gcc** and **CUDA**. + +2. Run the following commands: - git clone --recursive https://github.com/microsoft/LightGBM - cd LightGBM - cmake -B build -S . -DUSE_CUDA=1 - cmake --build build -j4 + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_CUDA=ON + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_, **Clang**, **OpenMP** and **CUDA**. + +2. Run the following commands: -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_CUDA=ON + cmake --build build -j4 macOS ^^^^^ The CUDA version is not supported on macOS. -Windows -^^^^^^^ - -The CUDA version is not supported on Windows. -Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows. - Build Java Wrapper ~~~~~~~~~~~~~~~~~~ Using the following instructions you can generate a JAR file containing the LightGBM `C API <./Development-Guide.rst#c-api>`__ wrapped by **SWIG**. +After compilation the ``.jar`` file will be in ``LightGBM/build`` folder. + Windows ^^^^^^^ -On Windows a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **VS Build Tools** or **MinGW**. +On Windows, a Java wrapper of LightGBM can be built using + +- **Java**, **SWIG**, **CMake** and **VS Build Tools**; +- **Java**, **SWIG**, **CMake** and **MinGW**. VS Build Tools ************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). -2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 3. Run the following commands: @@ -638,14 +734,12 @@ VS Build Tools cmake -B build -S . -A x64 -DUSE_SWIG=ON cmake --build build --target ALL_BUILD --config Release -The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/Release`` folder. - MinGW-w64 ********* 1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_. -2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 3. Run the following commands: @@ -656,9 +750,7 @@ MinGW-w64 cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON cmake --build build -j4 -The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/`` folder. - -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. It is recommended to use **VS Build Tools (Visual Studio)** since it has better multithreading efficiency in **Windows** for many-core systems (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__). @@ -666,9 +758,15 @@ It is recommended to use **VS Build Tools (Visual Studio)** since it has better Linux ^^^^^ -On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **gcc** or **Clang**. +On Linux, a Java wrapper of LightGBM can be built using + +- **CMake**, **gcc**, **Java** and **SWIG**; +- **CMake**, **Clang**, **Java** and **SWIG**. + +gcc +*** -1. Install `CMake`_, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +1. Install `CMake`_, **gcc**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 2. Run the following commands: @@ -679,34 +777,40 @@ On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMa cmake -B build -S . -DUSE_SWIG=ON cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). +Clang +***** -macOS -^^^^^ +1. Install `CMake`_, **Clang**, **OpenMP**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). -On macOS a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **Apple Clang** or **gcc**. +2. Run the following commands: -First, install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). -Then, either follow the **Apple Clang** or **gcc** installation instructions below. + .. code:: sh -Apple Clang -*********** + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_SWIG=ON + cmake --build build -j4 -Only **Apple Clang** version 8.1 or higher is supported. +macOS +^^^^^ -1. Install `CMake`_ : +On macOS, a Java wrapper of LightGBM can be built using - .. code:: sh +- **CMake**, **Java**, **SWIG** and **Apple Clang**; +- **CMake**, **Java**, **SWIG** and **gcc**. - brew install cmake +Apple Clang +*********** -2. Install **OpenMP**: +1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **OpenMP**: .. code:: sh - brew install libomp + brew install cmake openjdk swig libomp + export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/" -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -718,19 +822,14 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : +1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **gcc**: .. code:: sh - brew install cmake - -2. Install **gcc**: - - .. code:: sh - - brew install gcc + brew install cmake openjdk swig gcc + export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/" -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -740,15 +839,31 @@ gcc cmake -B build -S . -DUSE_SWIG=ON cmake --build build -j4 +Build Python-package +~~~~~~~~~~~~~~~~~~~~ + +Refer to `Python-package folder `__. + +Build R-package +~~~~~~~~~~~~~~~ + +Refer to `R-package folder `__. + Build C++ Unit Tests ~~~~~~~~~~~~~~~~~~~~ Windows ^^^^^^^ -On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Build Tools**. +On Windows, C++ unit tests of LightGBM can be built using + +- **CMake** and **VS Build Tools**; +- **CMake** and **MinGW**. + +VS Build Tools +************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -756,17 +871,43 @@ On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Bui git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm --config Debug The ``.exe`` file will be in ``LightGBM/Debug`` folder. +MinGW-w64 +********* + +1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_. + +2. Run the following commands: + + .. code:: console + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON + cmake --build build --target testlightgbm -j4 + +The ``.exe`` file will be in ``LightGBM/`` folder. + +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. + Linux ^^^^^ -On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** or **Clang**. +On Linux, a C++ unit tests of LightGBM can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable file will be in ``LightGBM/`` folder. + +gcc +*** -1. Install `CMake`_. +1. Install `CMake`_ and **gcc**. 2. Run the following commands: @@ -774,24 +915,42 @@ On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** o git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON + cmake --build build --target testlightgbm -j4 + +Clang +***** + +1. Install `CMake`_, **Clang** and **OpenMP**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 macOS ^^^^^ -On macOS a C++ unit tests of LightGBM can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, a C++ unit tests of LightGBM can be built using + +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. + +After compilation the executable file will be in ``LightGBM/`` folder. Apple Clang *********** -Only **Apple Clang** version 8.1 or higher is supported. - -1. Install `CMake`_ : +1. Install `CMake`_ and **OpenMP**: .. code:: sh - brew install cmake + brew install cmake libomp 2. Run the following commands: @@ -799,42 +958,32 @@ Only **Apple Clang** version 8.1 or higher is supported. git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 gcc *** -1. Install `CMake`_ : +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install cmake - -2. Install **gcc**: + brew install cmake gcc - .. code:: sh - - brew install gcc - -3. Run the following commands: +2. Run the following commands: .. code:: sh git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM export CXX=g++-7 CC=gcc-7 # replace "7" with version of gcc installed on your machine - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 .. |download artifacts| image:: ./_static/images/artifacts-not-available.svg :target: https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html -.. _Python-package: https://github.com/microsoft/LightGBM/tree/master/python-package - -.. _R-package: https://github.com/microsoft/LightGBM/tree/master/R-package - .. _Visual Studio: https://visualstudio.microsoft.com/downloads/ .. _Git for Windows: https://git-scm.com/download/win @@ -864,3 +1013,5 @@ gcc .. _this detailed guide: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html .. _following docs: https://github.com/google/sanitizers/wiki + +.. _Ninja: https://ninja-build.org diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 2b68314b8f15..ceaf746846f5 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -35,7 +35,7 @@ For example, in Python: .. code-block:: python - # use learning rate of 0.07, becase 'learning_rate' + # use learning rate of 0.07, because 'learning_rate' # is the primary parameter name lgb.train( params={ @@ -795,7 +795,7 @@ Dataset Parameters - it is recommended to rescale data before training so that features have similar mean and standard deviation - - **Note**: works only with ``cpu`` device type and ``serial`` tree learner + - **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner - **Note**: ``regression_l1`` objective is not supported with linear tree boosting diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js index 107a6a4969a3..c6d21713fe5c 100644 --- a/docs/_static/js/script.js +++ b/docs/_static/js/script.js @@ -1,69 +1,71 @@ -$(function() { +$(() => { /* Use wider container for the page content */ - $('.wy-nav-content').each(function() { this.style.setProperty('max-width', 'none', 'important'); }); + $(".wy-nav-content").each(function () { + this.style.setProperty("max-width", "none", "important"); + }); /* List each class property item on a new line https://github.com/microsoft/LightGBM/issues/5073 */ - if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') != -1) { - $('.py.property').each(function() { this.style.setProperty('display', 'inline', 'important'); }); - } - - /* Point to the same version of R API as the current docs version */ - var current_version_elems = $('.rst-current-version'); - if(current_version_elems.length !== 0) { - var current_version = $(current_version_elems[0]).contents().filter(function() { - return this.nodeType == 3; - }).text().trim().split(' ').pop(); - if(current_version !== 'latest') { - $('a.reference.external[href$="/latest/R/reference/"]').each(function() { - $(this).attr('href', function (_, val) { return val.replace('/latest/', '/' + current_version + '/'); }); - }); - } + if (window.location.pathname.toLocaleLowerCase().indexOf("pythonapi") !== -1) { + $(".py.property").each(function () { + this.style.setProperty("display", "inline", "important"); + }); } /* Collapse specified sections in the installation guide */ - if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') != -1) { - $('').appendTo('body'); - var collapsable = [ - '#build-threadless-version-not-recommended', - '#build-mpi-version', - '#build-gpu-version', - '#build-cuda-version', - '#build-java-wrapper', - '#build-c-unit-tests' + if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) { + $( + '', + ).appendTo("body"); + const collapsible = [ + "#build-threadless-version-not-recommended", + "#build-mpi-version", + "#build-gpu-version", + "#build-cuda-version", + "#build-java-wrapper", + "#build-python-package", + "#build-r-package", + "#build-c-unit-tests", ]; - $.each(collapsable, function(_, val) { - var header = val + ' > :header:first'; - var content = val + ' :not(:header:first)'; - $(header).addClass('closed'); + $.each(collapsible, (_, val) => { + const header = `${val} > :header:first`; + const content = `${val} :not(:header:first)`; + $(header).addClass("closed"); $(content).hide(); - $(header).click(function() { - $(header).toggleClass('closed opened'); + $(header).click(() => { + $(header).toggleClass("closed opened"); $(content).slideToggle(0); }); }); /* Uncollapse parent sections when nested section is specified in the URL or before navigate to it from navbar */ function uncollapse(section) { - section.parents().each((_, val) => { $(val).children('.closed').click(); }); + section.parents().each((_, val) => { + $(val).children(".closed").click(); + }); } uncollapse($(window.location.hash)); - $('.wy-menu.wy-menu-vertical li a.reference.internal').click(function() { - uncollapse($($(this).attr('href'))); + $(".wy-menu.wy-menu-vertical li a.reference.internal").click(function () { + uncollapse($($(this).attr("href"))); }); /* Modify src and href attrs of artifacts badge */ function modifyBadge(src, href) { - $('img[alt="download artifacts"]').each(function() { + $('img[alt="download artifacts"]').each(function () { this.src = src; this.parentNode.href = href; }); } /* Initialize artifacts badge */ - modifyBadge('./_static/images/artifacts-fetching.svg', '#'); + modifyBadge("./_static/images/artifacts-fetching.svg", "#"); /* Fetch latest buildId and construct artifacts badge */ - $.getJSON('https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7', function(data) { - modifyBadge('./_static/images/artifacts-download.svg', - 'https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/' + data['value'][0]['id'] + '/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip'); - }); + $.getJSON( + "https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7", + (data) => { + modifyBadge( + "./_static/images/artifacts-download.svg", + `https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/${data.value[0].id}/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip`, + ); + }, + ); } }); diff --git a/docs/conf.py b/docs/conf.py index f8bd29a69922..256787bf7f8d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ sys.path.insert(0, str(LIB_PATH)) INTERNAL_REF_REGEX = compile(r"(?P\.\/.+)(?P\.rst)(?P$|#)") +RTD_R_REF_REGEX = compile(r"(?Phttps://.+/)(?Platest)(?P/R/reference/)") class InternalRefTransform(Transform): @@ -69,6 +70,7 @@ def run(self) -> List: os.environ["LIGHTGBM_BUILD_DOC"] = "1" C_API = os.environ.get("C_API", "").lower().strip() != "no" RTD = bool(os.environ.get("READTHEDOCS", "")) +RTD_VERSION = os.environ.get("READTHEDOCS_VERSION", "stable") # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = "2.1.0" # Due to sphinx.ext.napoleon, autodoc_typehints @@ -309,6 +311,22 @@ def generate_r_docs(app: Sphinx) -> None: raise Exception(f"An error has occurred while generating documentation for R-package\n{e}") +def replace_reference_to_r_docs(app: Sphinx) -> None: + """Make reference to R-package documentation point to the actual version. + + Parameters + ---------- + app : sphinx.application.Sphinx + The application object representing the Sphinx process. + """ + index_doc_path = CURR_PATH / "index.rst" + with open(index_doc_path, "r+t", encoding="utf-8") as index_doc: + content = index_doc.read() + content = RTD_R_REF_REGEX.sub(rf"\g{RTD_VERSION}\g", content) + index_doc.seek(0) + index_doc.write(content) + + def setup(app: Sphinx) -> None: """Add new elements at Sphinx initialization time. @@ -330,6 +348,7 @@ def setup(app: Sphinx) -> None: app.connect( "build-finished", lambda app, _: copytree(CURR_PATH.parent / "lightgbm_r" / "docs", Path(app.outdir) / "R") ) + app.connect("builder-inited", replace_reference_to_r_docs) app.add_transform(InternalRefTransform) add_js_file = getattr(app, "add_js_file", False) or app.add_javascript add_js_file("js/script.js") diff --git a/examples/binary_classification/forced_splits.json b/examples/binary_classification/forced_splits.json index 1ee410c9789e..b09391a87f49 100644 --- a/examples/binary_classification/forced_splits.json +++ b/examples/binary_classification/forced_splits.json @@ -1,6 +1,6 @@ { "feature": 25, - "threshold": 1.30, + "threshold": 1.3, "left": { "feature": 26, "threshold": 0.85 diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf index 2aa2113b40d4..f007dcd6fe66 100644 --- a/examples/lambdarank/train.conf +++ b/examples/lambdarank/train.conf @@ -64,7 +64,7 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb index cc8efa2c187b..a8abdf325d9d 100644 --- a/examples/python-guide/notebooks/interactive_plot_example.ipynb +++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb @@ -30,7 +30,7 @@ "try:\n", " # To enable interactive mode you should install ipywidgets\n", " # https://github.com/jupyter-widgets/ipywidgets\n", - " from ipywidgets import interact, SelectMultiple\n", + " from ipywidgets import SelectMultiple, interact\n", "\n", " INTERACTIVE = True\n", "except ImportError:\n", diff --git a/examples/regression/forced_bins.json b/examples/regression/forced_bins.json index 1ee0a49d727c..19722afbbb4b 100644 --- a/examples/regression/forced_bins.json +++ b/examples/regression/forced_bins.json @@ -1,10 +1,10 @@ [ { "feature": 0, - "bin_upper_bound": [ 0.3, 0.35, 0.4 ] + "bin_upper_bound": [0.3, 0.35, 0.4] }, { "feature": 1, - "bin_upper_bound": [ -0.1, -0.15, -0.2 ] + "bin_upper_bound": [-0.1, -0.15, -0.2] } ] diff --git a/examples/regression/forced_bins2.json b/examples/regression/forced_bins2.json index f4dca0ccaf34..d6454f8a4ae9 100644 --- a/examples/regression/forced_bins2.json +++ b/examples/regression/forced_bins2.json @@ -1,6 +1,6 @@ [ { "feature": 0, - "bin_upper_bound": [ 0.19, 0.39, 0.59, 0.79 ] + "bin_upper_bound": [0.19, 0.39, 0.59, 0.79] } ] diff --git a/examples/regression/train.conf b/examples/regression/train.conf index cd910af61dcf..992bc6c9ab53 100644 --- a/examples/regression/train.conf +++ b/examples/regression/train.conf @@ -20,7 +20,7 @@ objective = regression # binary_error metric = l2 -# frequence for metric output +# frequency for metric output metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric @@ -36,12 +36,12 @@ max_bin = 255 # forcedbins_filename = forced_bins.json # training data -# if exsting weight file, should name to "regression.train.weight" +# if existing weight file, should name to "regression.train.weight" # alias: train_data, train data = regression.train # validation data, support multi validation data, separated by ',' -# if exsting weight file, should name to "regression.test.weight" +# if existing weight file, should name to "regression.test.weight" # alias: valid, test, test_data, valid_data = regression.test @@ -62,7 +62,7 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration @@ -72,7 +72,7 @@ feature_fraction = 0.9 # Support bagging (data sub-sample), will perform bagging every 5 iterations bagging_freq = 5 -# Bagging farction, will random select 80% data on bagging +# Bagging fraction, will random select 80% data on bagging # alias: sub_row bagging_fraction = 0.8 diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h index a33fcfa9c45c..5826f2387102 100644 --- a/include/LightGBM/bin.h +++ b/include/LightGBM/bin.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index aa75a2eb0bc4..e4a2ade69c01 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -664,7 +664,7 @@ struct Config { // desc = categorical features are used for splits as normal but are not used in the linear models // desc = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R // desc = it is recommended to rescale data before training so that features have similar mean and standard deviation - // desc = **Note**: works only with ``cpu`` device type and ``serial`` tree learner + // desc = **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner // desc = **Note**: ``regression_l1`` objective is not supported with linear tree boosting // desc = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM // desc = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves diff --git a/include/LightGBM/cuda/cuda_algorithms.hpp b/include/LightGBM/cuda/cuda_algorithms.hpp index f79fc57e4f42..abda07b1582f 100644 --- a/include/LightGBM/cuda/cuda_algorithms.hpp +++ b/include/LightGBM/cuda/cuda_algorithms.hpp @@ -115,7 +115,7 @@ __device__ __forceinline__ T ShuffleReduceSumWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceSum(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; @@ -145,7 +145,7 @@ __device__ __forceinline__ T ShuffleReduceMaxWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceMax(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; @@ -196,7 +196,7 @@ __device__ __forceinline__ T ShuffleReduceMinWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceMin(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; diff --git a/include/LightGBM/cuda/cuda_column_data.hpp b/include/LightGBM/cuda/cuda_column_data.hpp index 1aa1edb05f4b..496142c8d67a 100644 --- a/include/LightGBM/cuda/cuda_column_data.hpp +++ b/include/LightGBM/cuda/cuda_column_data.hpp @@ -14,6 +14,7 @@ #include #include +#include #include namespace LightGBM { diff --git a/include/LightGBM/cuda/cuda_row_data.hpp b/include/LightGBM/cuda/cuda_row_data.hpp index 5519bb9a46e1..b3b33d48b891 100644 --- a/include/LightGBM/cuda/cuda_row_data.hpp +++ b/include/LightGBM/cuda/cuda_row_data.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #define COPY_SUBROW_BLOCK_SIZE_ROW_DATA (1024) diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index b4f7ec025c64..702f6d600496 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -376,7 +377,7 @@ class Metadata { std::vector query_boundaries_; /*! \brief Query weights */ std::vector query_weights_; - /*! \brief Number of querys */ + /*! \brief Number of queries */ data_size_t num_queries_; /*! \brief Number of Initial score, used to check correct weight file */ int64_t num_init_score_; diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h index f13a5fff966f..bcc0388ba507 100644 --- a/include/LightGBM/feature_group.h +++ b/include/LightGBM/feature_group.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/train_share_states.h b/include/LightGBM/train_share_states.h index f102668edf70..e4f4e4afea5f 100644 --- a/include/LightGBM/train_share_states.h +++ b/include/LightGBM/train_share_states.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h index c28ddd140c48..bc5af621e402 100644 --- a/include/LightGBM/tree.h +++ b/include/LightGBM/tree.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 6c3ebf5d0096..67bc07b0ecd5 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -925,11 +925,11 @@ class AlignmentAllocator { inline ~AlignmentAllocator() throw() {} - inline pointer adress(reference r) { + inline pointer address(reference r) { return &r; } - inline const_pointer adress(const_reference r) const { + inline const_pointer address(const_reference r) const { return &r; } diff --git a/include/LightGBM/utils/random.h b/include/LightGBM/utils/random.h index 6f89f935b310..eb115ea96644 100644 --- a/include/LightGBM/utils/random.h +++ b/include/LightGBM/utils/random.h @@ -22,9 +22,9 @@ class Random { */ Random() { std::random_device rd; - auto genrator = std::mt19937(rd()); + auto generator = std::mt19937(rd()); std::uniform_int_distribution distribution(0, x); - x = distribution(genrator); + x = distribution(generator); } /*! * \brief Constructor, with specific seed diff --git a/python-package/README.rst b/python-package/README.rst index 0e007e5ee7ec..face6bba6b74 100644 --- a/python-package/README.rst +++ b/python-package/README.rst @@ -286,7 +286,7 @@ To check that a contribution to the package matches its style expectations, run .. code:: sh - bash .ci/lint-python.sh + bash .ci/lint-python-bash.sh .. |License| image:: https://img.shields.io/github/license/microsoft/lightgbm.svg :target: https://github.com/microsoft/LightGBM/blob/master/LICENSE diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index cf3723aadc63..e06290dc1c5f 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -188,6 +188,23 @@ def _get_sample_count(total_nrow: int, params: str) -> int: return sample_cnt.value +def _np2d_to_np1d(mat: np.ndarray) -> Tuple[np.ndarray, int]: + if mat.dtype in (np.float32, np.float64): + dtype = mat.dtype + else: + dtype = np.float32 + if mat.flags["F_CONTIGUOUS"]: + order = "F" + layout = _C_API_IS_COL_MAJOR + else: + order = "C" + layout = _C_API_IS_ROW_MAJOR + # ensure dtype and order, copies if either do not match + data = np.asarray(mat, dtype=dtype, order=order) + # flatten array without copying + return data.ravel(order=order), layout + + class _MissingType(Enum): NONE = "None" NAN = "NaN" @@ -684,7 +701,8 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va _C_API_DTYPE_INT32 = 2 _C_API_DTYPE_INT64 = 3 -"""Matrix is row major in Python""" +"""Macro definition of data order in matrix""" +_C_API_IS_COL_MAJOR = 0 _C_API_IS_ROW_MAJOR = 1 """Macro definition of prediction type in C API of LightGBM""" @@ -1273,10 +1291,7 @@ def __inner_predict_np2d( predict_type: int, preds: Optional[np.ndarray], ) -> Tuple[np.ndarray, int]: - if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) - else: # change non-float data to float data, need to copy - data = np.array(mat.reshape(mat.size), dtype=np.float32) + data, layout = _np2d_to_np1d(mat) ptr_data, type_ptr_data, _ = _c_float_array(data) n_preds = self.__get_num_preds( start_iteration=start_iteration, @@ -1296,7 +1311,7 @@ def __inner_predict_np2d( ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[1]), - ctypes.c_int(_C_API_IS_ROW_MAJOR), + ctypes.c_int(layout), ctypes.c_int(predict_type), ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), @@ -2297,11 +2312,7 @@ def __init_from_np2d( raise ValueError("Input numpy.ndarray must be 2 dimensional") self._handle = ctypes.c_void_p() - if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) - else: # change non-float data to float data, need to copy - data = np.asarray(mat.reshape(mat.size), dtype=np.float32) - + data, layout = _np2d_to_np1d(mat) ptr_data, type_ptr_data, _ = _c_float_array(data) _safe_call( _LIB.LGBM_DatasetCreateFromMat( @@ -2309,7 +2320,7 @@ def __init_from_np2d( ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[1]), - ctypes.c_int(_C_API_IS_ROW_MAJOR), + ctypes.c_int(layout), _c_str(params_str), ref_dataset, ctypes.byref(self._handle), @@ -2493,13 +2504,13 @@ def _compare_params_for_warning( compare_result : bool Returns whether two dictionaries with params are equal. """ - for k in other_params: + for k, v in other_params.items(): if k not in ignore_keys: - if k not in params or params[k] != other_params[k]: + if k not in params or params[k] != v: return False - for k in params: + for k, v in params.items(): if k not in ignore_keys: - if k not in other_params or params[k] != other_params[k]: + if k not in other_params or v != other_params[k]: return False return True @@ -3525,7 +3536,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset": _log_warning(err_msg) self.feature_name = self.get_feature_name() _log_warning( - "Reseting categorical features.\n" + "Resetting categorical features.\n" "You can set new categorical features via ``set_categorical_feature`` method" ) self.categorical_feature = "auto" diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index ae1e72c549d4..c64fb8ba755b 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -73,15 +73,13 @@ class CallbackEnv: def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str: """Format metric string.""" - if len(value) == 4: - return f"{value[0]}'s {value[1]}: {value[2]:g}" - elif len(value) == 5: - if show_stdv: - return f"{value[0]}'s {value[1]}: {value[2]:g} + {value[4]:g}" # type: ignore[misc] - else: - return f"{value[0]}'s {value[1]}: {value[2]:g}" - else: - raise ValueError("Wrong metric value") + dataset_name, metric_name, metric_value, *_ = value + out = f"{dataset_name}'s {metric_name}: {metric_value:g}" + # tuples from cv() sometimes have a 5th item, with standard deviation of + # the evaluation metric (taken over all cross-validation folds) + if show_stdv and len(value) == 5: + out += f" + {value[4]:g}" + return out class _LogEvaluationCallback: diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index e15979bc40db..dcdacba7366c 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -967,7 +967,7 @@ def _extract(items: List[Any], i: int) -> Any: out[i].append(part) # by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix - # the code below is used instead to ensure that the sparse type is preserved during concatentation + # the code below is used instead to ensure that the sparse type is preserved during concatenation if isinstance(pred_meta, ss.csr_matrix): concat_fn = partial(ss.vstack, format="csr") elif isinstance(pred_meta, ss.csc_matrix): diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 89910599b0ca..dca6b607cdc7 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -3,7 +3,6 @@ import copy import json -import warnings from collections import OrderedDict, defaultdict from operator import attrgetter from pathlib import Path @@ -15,17 +14,14 @@ from .basic import ( Booster, Dataset, - LGBMDeprecationWarning, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _LGBM_BoosterEvalMethodResultType, _LGBM_BoosterEvalMethodResultWithStandardDeviationType, - _LGBM_CategoricalFeatureConfiguration, _LGBM_CustomObjectiveFunction, _LGBM_EvalFunctionResultType, - _LGBM_FeatureNameConfiguration, _log_warning, ) from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold @@ -54,15 +50,6 @@ ] -def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None: - msg = ( - f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in " - f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. " - "See https://github.com/microsoft/LightGBM/issues/6435." - ) - warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2) - - def _choose_num_iterations(num_boost_round_kwarg: int, params: Dict[str, Any]) -> Dict[str, Any]: """Choose number of boosting rounds. @@ -127,8 +114,6 @@ def train( valid_names: Optional[List[str]] = None, feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, - feature_name: _LGBM_FeatureNameConfiguration = "auto", - categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto", keep_training_booster: bool = False, callbacks: Optional[List[Callable]] = None, ) -> Booster: @@ -170,21 +155,6 @@ def train( set the ``metric`` parameter to the string ``"None"`` in ``params``. init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of str, or 'auto', optional (default="auto") - **Deprecated.** Set ``feature_name`` on ``train_set`` instead. - Feature names. - If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of str or int, or 'auto', optional (default="auto") - **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. - Categorical features. - If list of int, interpreted as indices. - If list of str, interpreted as feature names (need to specify ``feature_name`` as well). - If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. - All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647). - Large values could be memory consuming. Consider using consecutive integers starting from zero. - All negative values in categorical features will be treated as missing values. - The output cannot be monotonically constrained with respect to a categorical feature. - Floating point numbers in categorical features will be rounded towards 0. keep_training_booster : bool, optional (default=False) Whether the returned Booster will be used to keep training. If False, the returned value will be converted into _InnerPredictor before returning. @@ -233,13 +203,6 @@ def train( f"Item {i} has type '{type(valid_item).__name__}'." ) - # raise deprecation warnings if necessary - # ref: https://github.com/microsoft/LightGBM/issues/6435 - if categorical_feature != "auto": - _emit_dataset_kwarg_warning("train", "categorical_feature") - if feature_name != "auto": - _emit_dataset_kwarg_warning("train", "feature_name") - # create predictor first params = copy.deepcopy(params) params = _choose_param_value( @@ -278,9 +241,7 @@ def train( else: init_iteration = 0 - train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature( - categorical_feature - ) + train_set._update_params(params)._set_predictor(predictor) is_valid_contain_train = False train_data_name = "training" @@ -642,8 +603,6 @@ def cv( metrics: Optional[Union[str, List[str]]] = None, feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, - feature_name: _LGBM_FeatureNameConfiguration = "auto", - categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto", fpreproc: Optional[_LGBM_PreprocFunction] = None, seed: int = 0, callbacks: Optional[List[Callable]] = None, @@ -699,21 +658,6 @@ def cv( set ``metrics`` to the string ``"None"``. init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of str, or 'auto', optional (default="auto") - **Deprecated.** Set ``feature_name`` on ``train_set`` instead. - Feature names. - If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of str or int, or 'auto', optional (default="auto") - **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. - Categorical features. - If list of int, interpreted as indices. - If list of str, interpreted as feature names (need to specify ``feature_name`` as well). - If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. - All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647). - Large values could be memory consuming. Consider using consecutive integers starting from zero. - All negative values in categorical features will be treated as missing values. - The output cannot be monotonically constrained with respect to a categorical feature. - Floating point numbers in categorical features will be rounded towards 0. fpreproc : callable or None, optional (default=None) Preprocessing function that takes (dtrain, dtest, params) and returns transformed versions of those. @@ -767,13 +711,6 @@ def cv( if not isinstance(train_set, Dataset): raise TypeError(f"cv() only accepts Dataset object, train_set has type '{type(train_set).__name__}'.") - # raise deprecation warnings if necessary - # ref: https://github.com/microsoft/LightGBM/issues/6435 - if categorical_feature != "auto": - _emit_dataset_kwarg_warning("cv", "categorical_feature") - if feature_name != "auto": - _emit_dataset_kwarg_warning("cv", "feature_name") - params = copy.deepcopy(params) params = _choose_param_value( main_param_name="objective", @@ -818,9 +755,7 @@ def cv( params.pop(metric_alias, None) params["metric"] = metrics - train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature( - categorical_feature - ) + train_set._update_params(params)._set_predictor(predictor) results = defaultdict(list) cvfolds = _make_n_folds( diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index c4d1200e99e4..108ef1e14498 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -671,6 +671,15 @@ def __init__( # is >=1.6. # ref: https://github.com/microsoft/LightGBM/pull/6651 def _more_tags(self) -> Dict[str, Any]: + check_sample_weight_str = ( + "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " + "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " + "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " + "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." + ) + # "check_sample_weight_equivalence" can be removed when lightgbm's + # minimum supported scikit-learn version is at least 1.6 + # ref: https://github.com/scikit-learn/scikit-learn/pull/30137 return { "allow_nan": True, "X_types": ["2darray", "sparse", "1dlabels"], @@ -678,12 +687,9 @@ def _more_tags(self) -> Dict[str, Any]: "check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes " "cannot be set in __init__: " "(see https://github.com/microsoft/LightGBM/issues/2628)", - "check_sample_weight_equivalence": ( - "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " - "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " - "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " - "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." - ), + "check_sample_weight_equivalence": check_sample_weight_str, + "check_sample_weight_equivalence_on_dense_data": check_sample_weight_str, + "check_sample_weight_equivalence_on_sparse_data": check_sample_weight_str, }, } @@ -703,7 +709,6 @@ def _update_sklearn_tags_from_dict( tags.input_tags.allow_nan = tags_dict["allow_nan"] tags.input_tags.sparse = "sparse" in tags_dict["X_types"] tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"] - tags._xfail_checks = tags_dict["_xfail_checks"] return tags def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]: @@ -719,7 +724,7 @@ def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]: # take whatever tags are provided by BaseEstimator, then modify # them with LightGBM-specific values return self._update_sklearn_tags_from_dict( - tags=_LGBMModelBase.__sklearn_tags__(self), + tags=super().__sklearn_tags__(), tags_dict=self._more_tags(), ) @@ -1291,7 +1296,7 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - return LGBMModel.__sklearn_tags__(self) + return super().__sklearn_tags__() def fit( # type: ignore[override] self, @@ -1350,7 +1355,10 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - return LGBMModel.__sklearn_tags__(self) + tags = super().__sklearn_tags__() + tags.classifier_tags.multi_class = True + tags.classifier_tags.multi_label = False + return tags def fit( # type: ignore[override] self, diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 19866e01202b..8fcc85814db5 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -84,17 +84,6 @@ minimum-version = "build-system.requires" # end:build-system -[tool.isort] -include_trailing_comma = true -line_length = 120 -# "vertical hanging indent", to match what ruff-format does -# ref: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html#3-vertical-hanging-indent -multi_line_output = 3 -skip_glob = [ - "*/external_libs/*", - "*/lightgbm-python/*", -] - [tool.mypy] disallow_untyped_defs = true exclude = 'build/*|compile/*|docs/*|examples/*|external_libs/*|lightgbm-python/*|tests/*' @@ -140,7 +129,7 @@ ignore = [ "PLR1714", # (pylint) Magic value used in comparison "PLR2004", - # (pylint) for loop veriable overwritten by assignment target + # (pylint) for loop variable overwritten by assignment target "PLW2901", # (pylint) use 'elif' instead of 'else' then 'if', to reduce indentation "PLR5501" @@ -152,10 +141,12 @@ select = [ "C4", # pydocstyle "D", - # pycodestyle + # pycodestyle (errors) "E", # pyflakes "F", + # isort + "I", # NumPy-specific rules "NPY", # pylint @@ -166,11 +157,13 @@ select = [ "SIM401", # flake8-print "T", + # pycodestyle (warnings) + "W", ] [tool.ruff.lint.per-file-ignores] "docs/conf.py" = [ - # (flake8-bugbear) raise exceptions with "raise ... from errr" + # (flake8-bugbear) raise exceptions with "raise ... from err" "B904", # (flake8-print) flake8-print "T" @@ -196,3 +189,6 @@ select = [ [tool.ruff.lint.pydocstyle] convention = "numpy" + +[tool.ruff.lint.isort] +known-first-party = ["lightgbm"] diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp index 7a66b5696425..451384e6850a 100644 --- a/src/boosting/bagging.hpp +++ b/src/boosting/bagging.hpp @@ -73,17 +73,17 @@ class BaggingSampleStrategy : public SampleStrategy { for (data_size_t i = start_index + 1; i < end_index; ++i) { sampled_query_boundaries_[i] += sampled_query_boundaries_[i - 1]; } - sampled_query_boundaires_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1]; + sampled_query_boundaries_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1]; }); for (int thread_index = 1; thread_index < num_blocks; ++thread_index) { - sampled_query_boundaires_thread_buffer_[thread_index] += sampled_query_boundaires_thread_buffer_[thread_index - 1]; + sampled_query_boundaries_thread_buffer_[thread_index] += sampled_query_boundaries_thread_buffer_[thread_index - 1]; } Threading::For(0, num_sampled_queries_ + 1, 128, [this](int thread_index, data_size_t start_index, data_size_t end_index) { if (thread_index > 0) { for (data_size_t i = start_index; i < end_index; ++i) { - sampled_query_boundaries_[i] += sampled_query_boundaires_thread_buffer_[thread_index - 1]; + sampled_query_boundaries_[i] += sampled_query_boundaries_thread_buffer_[thread_index - 1]; } } }); @@ -171,7 +171,7 @@ class BaggingSampleStrategy : public SampleStrategy { } else { bagging_runner_.ReSize(num_queries_); sampled_query_boundaries_.resize(num_queries_ + 1, 0); - sampled_query_boundaires_thread_buffer_.resize(num_threads_, 0); + sampled_query_boundaries_thread_buffer_.resize(num_threads_, 0); bag_query_indices_.resize(num_data_); } bagging_rands_.clear(); @@ -280,7 +280,7 @@ class BaggingSampleStrategy : public SampleStrategy { /*! \brief query boundaries of the in-bag queries */ std::vector sampled_query_boundaries_; /*! \brief buffer for calculating sampled_query_boundaries_ */ - std::vector sampled_query_boundaires_thread_buffer_; + std::vector sampled_query_boundaries_thread_buffer_; /*! \brief in-bag query indices */ std::vector> bag_query_indices_; /*! \brief number of queries in the training dataset */ diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index d0d7f87e2962..a82706e63f1d 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -547,17 +547,17 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { } } else { std::vector tree_sizes = CommonC::StringToArray(key_vals["tree_sizes"].c_str(), ' '); - std::vector tree_boundries(tree_sizes.size() + 1, 0); + std::vector tree_boundaries(tree_sizes.size() + 1, 0); int num_trees = static_cast(tree_sizes.size()); for (int i = 0; i < num_trees; ++i) { - tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i]; + tree_boundaries[i + 1] = tree_boundaries[i] + tree_sizes[i]; models_.emplace_back(nullptr); } OMP_INIT_EX(); #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_trees; ++i) { OMP_LOOP_EX_BEGIN(); - auto cur_p = p + tree_boundries[i]; + auto cur_p = p + tree_boundaries[i]; auto line_len = Common::GetLine(cur_p); std::string cur_line(cur_p, line_len); if (Common::StartsWith(cur_line, "Tree=")) { diff --git a/src/c_api.cpp b/src/c_api.cpp index 4e118cf489a4..7d05121292ce 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/src/io/cuda/cuda_column_data.cpp b/src/io/cuda/cuda_column_data.cpp index 7fe2238defa0..6fcdb1eb0692 100644 --- a/src/io/cuda/cuda_column_data.cpp +++ b/src/io/cuda/cuda_column_data.cpp @@ -7,6 +7,8 @@ #include +#include + namespace LightGBM { CUDAColumnData::CUDAColumnData(const data_size_t num_data, const int gpu_device_id) { diff --git a/src/io/json11.cpp b/src/io/json11.cpp index 32a9c9d718b7..acd09f9ecb12 100644 --- a/src/io/json11.cpp +++ b/src/io/json11.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp index 14ea8cb6c4f2..b9c23c144c64 100644 --- a/src/io/metadata.cpp +++ b/src/io/metadata.cpp @@ -225,7 +225,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector(buffer); int in_rank = *ptr_in_rank; + if (in_rank < 0 || in_rank >= num_machines_) { + Log::Fatal("Invalid rank %d found during initialization of linkers. The world size is %d.", in_rank, num_machines_); + } // add new socket SetLinker(in_rank, handler); ++connected_cnt; diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index dc97f20a11b8..8e866f842da8 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -46,7 +46,7 @@ class RankingObjective : public ObjectiveFunction { position_ids_ = metadata.position_ids(); // get number of different position ids num_position_ids_ = static_cast(metadata.num_position_ids()); - // get boundries + // get boundaries query_boundaries_ = metadata.query_boundaries(); if (query_boundaries_ == nullptr) { Log::Fatal("Ranking tasks require query information"); @@ -204,7 +204,7 @@ class LambdarankNDCG : public RankingObjective { } const double worst_score = score[sorted_idx[worst_idx]]; double sum_lambdas = 0.0; - // start accmulate lambdas by pairs that contain at least one document above truncation level + // start accumulate lambdas by pairs that contain at least one document above truncation level for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { if (score[sorted_idx[i]] == kMinScore) { continue; } for (data_size_t j = i + 1; j < cnt; ++j) { diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp index d9dd904b030a..09dbac3a94ba 100644 --- a/src/treelearner/cuda/cuda_best_split_finder.cpp +++ b/src/treelearner/cuda/cuda_best_split_finder.cpp @@ -111,7 +111,7 @@ void CUDABestSplitFinder::Init() { void CUDABestSplitFinder::InitCUDAFeatureMetaInfo() { cuda_is_feature_used_bytree_.Resize(static_cast(num_features_)); - // intialize split find task information (a split find task is one pass through the histogram of a feature) + // initialize split find task information (a split find task is one pass through the histogram of a feature) num_tasks_ = 0; for (int inner_feature_index = 0; inner_feature_index < num_features_; ++inner_feature_index) { const uint32_t num_bin = feature_num_bins_[inner_feature_index]; diff --git a/src/treelearner/cuda/cuda_data_partition.cu b/src/treelearner/cuda/cuda_data_partition.cu index 20535a2b76b0..75ebd33eb339 100644 --- a/src/treelearner/cuda/cuda_data_partition.cu +++ b/src/treelearner/cuda/cuda_data_partition.cu @@ -262,7 +262,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4( } } -#define GenDataToLeftBitVectorKernel_PARMS \ +#define GenDataToLeftBitVectorKernel_PARAMS \ const BIN_TYPE* column_data, \ const data_size_t num_data_in_leaf, \ const data_size_t* data_indices_in_leaf, \ @@ -286,7 +286,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4( template __global__ void GenDataToLeftBitVectorKernel( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, uint16_t* block_to_left_offset, data_size_t* block_to_left_offset_buffer, data_size_t* block_to_right_offset_buffer) { @@ -335,7 +335,7 @@ __global__ void GenDataToLeftBitVectorKernel( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_zero, const bool missing_is_na, const bool mfb_is_zero, @@ -363,7 +363,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_na, const bool mfb_is_zero, const bool mfb_is_na, @@ -380,7 +380,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_zero, const bool mfb_is_na, const bool max_bin_to_left, @@ -396,7 +396,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_na, const bool max_bin_to_left, const bool is_single_feature_in_column) { @@ -413,7 +413,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool max_bin_to_left, const bool is_single_feature_in_column) { if (!max_bin_to_left) { @@ -429,7 +429,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner4( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool is_single_feature_in_column) { if (!is_single_feature_in_column) { GenDataToLeftBitVectorKernel @@ -548,7 +548,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernel( #undef UpdateDataIndexToLeafIndexKernel_PARAMS #undef UpdateDataIndexToLeafIndex_ARGS -#undef GenDataToLeftBitVectorKernel_PARMS +#undef GenDataToLeftBitVectorKernel_PARAMS #undef GenBitVector_ARGS template diff --git a/src/treelearner/cuda/cuda_data_partition.hpp b/src/treelearner/cuda/cuda_data_partition.hpp index 67931e71715d..1c31805970d3 100644 --- a/src/treelearner/cuda/cuda_data_partition.hpp +++ b/src/treelearner/cuda/cuda_data_partition.hpp @@ -181,7 +181,7 @@ class CUDADataPartition: public NCCLInfo { const int left_leaf_index, const int right_leaf_index); -#define GenDataToLeftBitVectorKernel_PARMS \ +#define GenDataToLeftBitVectorKernel_PARAMS \ const BIN_TYPE* column_data, \ const data_size_t num_data_in_leaf, \ const data_size_t* data_indices_in_leaf, \ @@ -194,7 +194,7 @@ class CUDADataPartition: public NCCLInfo { template void LaunchGenDataToLeftBitVectorKernelInner( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_zero, const bool missing_is_na, const bool mfb_is_zero, @@ -204,7 +204,7 @@ class CUDADataPartition: public NCCLInfo { template void LaunchGenDataToLeftBitVectorKernelInner0( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_na, const bool mfb_is_zero, const bool mfb_is_na, @@ -213,7 +213,7 @@ class CUDADataPartition: public NCCLInfo { template void LaunchGenDataToLeftBitVectorKernelInner1( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_zero, const bool mfb_is_na, const bool max_bin_to_left, @@ -221,23 +221,23 @@ class CUDADataPartition: public NCCLInfo { template void LaunchGenDataToLeftBitVectorKernelInner2( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_na, const bool max_bin_to_left, const bool is_single_feature_in_column); template void LaunchGenDataToLeftBitVectorKernelInner3( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool max_bin_to_left, const bool is_single_feature_in_column); template void LaunchGenDataToLeftBitVectorKernelInner4( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool is_single_feature_in_column); -#undef GenDataToLeftBitVectorKernel_PARMS +#undef GenDataToLeftBitVectorKernel_PARAMS #define UpdateDataIndexToLeafIndexKernel_PARAMS \ const BIN_TYPE* column_data, \ @@ -388,7 +388,7 @@ class CUDADataPartition: public NCCLInfo { CUDAVector cuda_split_info_buffer_; // dataset information - /*! \brief number of data in training set, for intialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */ + /*! \brief number of data in training set, for initialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */ CUDAVector cuda_num_data_; diff --git a/src/treelearner/cuda/cuda_histogram_constructor.cpp b/src/treelearner/cuda/cuda_histogram_constructor.cpp index 74fbb2c099aa..6eb53d4ad4da 100644 --- a/src/treelearner/cuda/cuda_histogram_constructor.cpp +++ b/src/treelearner/cuda/cuda_histogram_constructor.cpp @@ -152,7 +152,7 @@ void CUDAHistogramConstructor::CalcConstructHistogramKernelDim( int* block_dim_y, const data_size_t num_data_in_smaller_leaf) { *block_dim_x = cuda_row_data_->max_num_column_per_partition(); - *block_dim_y = NUM_THRADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition(); + *block_dim_y = NUM_THREADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition(); *grid_dim_x = cuda_row_data_->num_feature_partitions(); *grid_dim_y = std::max(min_grid_dim_y_, ((num_data_in_smaller_leaf + NUM_DATA_PER_THREAD - 1) / NUM_DATA_PER_THREAD + (*block_dim_y) - 1) / (*block_dim_y)); diff --git a/src/treelearner/cuda/cuda_histogram_constructor.hpp b/src/treelearner/cuda/cuda_histogram_constructor.hpp index 9a590c9461b9..1768ee507370 100644 --- a/src/treelearner/cuda/cuda_histogram_constructor.hpp +++ b/src/treelearner/cuda/cuda_histogram_constructor.hpp @@ -19,7 +19,7 @@ #include "cuda_leaf_splits.hpp" #define NUM_DATA_PER_THREAD (400) -#define NUM_THRADS_PER_BLOCK (504) +#define NUM_THREADS_PER_BLOCK (504) #define NUM_FEATURE_PER_THREAD_GROUP (28) #define SUBTRACT_BLOCK_SIZE (1024) #define FIX_HISTOGRAM_SHARED_MEM_SIZE (1024) diff --git a/src/treelearner/cuda/cuda_leaf_splits.cpp b/src/treelearner/cuda/cuda_leaf_splits.cpp index 5530a52789c8..2bdd0d47fae1 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.cpp +++ b/src/treelearner/cuda/cuda_leaf_splits.cpp @@ -16,7 +16,7 @@ num_data_(num_data) {} CUDALeafSplits::~CUDALeafSplits() {} void CUDALeafSplits::Init(const bool use_quantized_grad) { - num_blocks_init_from_gradients_ = (num_data_ + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS; + num_blocks_init_from_gradients_ = (num_data_ + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS; // allocate more memory for sum reduction in CUDA // only the first element records the final sum @@ -67,7 +67,7 @@ void CUDALeafSplits::InitValues( void CUDALeafSplits::Resize(const data_size_t num_data) { num_data_ = num_data; - num_blocks_init_from_gradients_ = (num_data + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS; + num_blocks_init_from_gradients_ = (num_data + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS; cuda_sum_of_gradients_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); cuda_sum_of_hessians_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); cuda_sum_of_gradients_hessians_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); diff --git a/src/treelearner/cuda/cuda_leaf_splits.cu b/src/treelearner/cuda/cuda_leaf_splits.cu index 2f17eb64163d..921a5478ea15 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.cu +++ b/src/treelearner/cuda/cuda_leaf_splits.cu @@ -317,18 +317,18 @@ void CUDALeafSplits::LaunchInitValuesKernel( const data_size_t num_used_indices, hist_t* cuda_hist_in_leaf) { if (cuda_bagging_data_indices == nullptr) { - CUDAInitValuesKernel1<<>>( + CUDAInitValuesKernel1<<>>( cuda_gradients_, cuda_hessians_, num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData()); } else { - CUDAInitValuesKernel1<<>>( + CUDAInitValuesKernel1<<>>( cuda_gradients_, cuda_hessians_, num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData()); } SynchronizeCUDADevice(__FILE__, __LINE__); if (nccl_communicator_ != nullptr) { - ReduceGradKernel<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), + ReduceGradKernel<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), num_used_indices); SynchronizeCUDADevice(__FILE__, __LINE__); cudaStream_t cuda_stream = CUDAStreamCreate(); @@ -342,7 +342,7 @@ void CUDALeafSplits::LaunchInitValuesKernel( cuda_sum_of_hessians_buffer_.RawData(), num_used_indices, cuda_data_indices_in_leaf, cuda_hist_in_leaf, cuda_struct_.RawData()); } else { - CUDAInitValuesKernel2<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>( + CUDAInitValuesKernel2<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>( lambda_l1, lambda_l2, num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), @@ -364,11 +364,11 @@ void CUDALeafSplits::LaunchInitValuesKernel( const score_t* grad_scale, const score_t* hess_scale) { if (cuda_bagging_data_indices == nullptr) { - CUDAInitValuesKernel3<<>>( + CUDAInitValuesKernel3<<>>( reinterpret_cast(cuda_gradients_), num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale); } else { - CUDAInitValuesKernel3<<>>( + CUDAInitValuesKernel3<<>>( reinterpret_cast(cuda_gradients_), num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale); } @@ -376,7 +376,7 @@ void CUDALeafSplits::LaunchInitValuesKernel( SynchronizeCUDADevice(__FILE__, __LINE__); if (nccl_communicator_ != nullptr) { - ReduceGradKernel<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(num_blocks_init_from_gradients_, + ReduceGradKernel<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), num_used_indices); SynchronizeCUDADevice(__FILE__, __LINE__); @@ -393,7 +393,7 @@ void CUDALeafSplits::LaunchInitValuesKernel( cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), num_used_indices, cuda_data_indices_in_leaf, cuda_hist_in_leaf, cuda_struct_.RawData()); } else { - CUDAInitValuesKernel4<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>( + CUDAInitValuesKernel4<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>( lambda_l1, lambda_l2, num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), diff --git a/src/treelearner/cuda/cuda_leaf_splits.hpp b/src/treelearner/cuda/cuda_leaf_splits.hpp index 0fc372958c3a..eb5ace663e74 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.hpp +++ b/src/treelearner/cuda/cuda_leaf_splits.hpp @@ -13,7 +13,7 @@ #include #include -#define NUM_THRADS_PER_BLOCK_LEAF_SPLITS (1024) +#define NUM_THREADS_PER_BLOCK_LEAF_SPLITS (1024) #define NUM_DATA_THREAD_ADD_LEAF_SPLITS (6) namespace LightGBM { diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp index 64c342e5b01d..670788118455 100644 --- a/src/treelearner/data_parallel_tree_learner.cpp +++ b/src/treelearner/data_parallel_tree_learner.cpp @@ -260,12 +260,12 @@ void DataParallelTreeLearner::FindBestSplits(const Tree* tree) { if (smaller_leaf_num_bits <= 16) { std::memcpy(input_buffer_.data() + buffer_write_start_pos_int16_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawDataInt16(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histogram()); } else { if (local_smaller_leaf_num_bits == 32) { std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawDataInt32(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histogram()); } else { this->smaller_leaf_histogram_array_[feature_index].CopyFromInt16ToInt32( input_buffer_.data() + buffer_write_start_pos_[feature_index]); @@ -274,7 +274,7 @@ void DataParallelTreeLearner::FindBestSplits(const Tree* tree) { } else { std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawData(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfHistgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfHistogram()); } } global_timer.Stop("DataParallelTreeLearner::ReduceHistogram::Copy"); diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 70dd0fb5436f..2d4abbd27af1 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -668,15 +668,15 @@ class FeatureHistogram { /*! * \brief Binary size of this histogram */ - int SizeOfHistgram() const { + int SizeOfHistogram() const { return (meta_->num_bin - meta_->offset) * kHistEntrySize; } - int SizeOfInt32Histgram() const { + int SizeOfInt32Histogram() const { return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize; } - int SizeOfInt16Histgram() const { + int SizeOfInt16Histogram() const { return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize; } diff --git a/src/treelearner/gpu_tree_learner.cpp b/src/treelearner/gpu_tree_learner.cpp index 7c6c811c3b45..1bf21d65ccc6 100644 --- a/src/treelearner/gpu_tree_learner.cpp +++ b/src/treelearner/gpu_tree_learner.cpp @@ -777,7 +777,7 @@ void GPUTreeLearner::ResetIsConstantHessian(bool is_constant_hessian) { void GPUTreeLearner::BeforeTrain() { #if GPU_DEBUG >= 2 - printf("Copying intial full gradients and hessians to device\n"); + printf("Copying initial full gradients and hessians to device\n"); #endif // Copy initial full hessians and gradients to GPU. // We start copying as early as possible, instead of at ConstructHistogram(). diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu index d778d650f722..9d8427a6f9a8 100644 --- a/src/treelearner/kernels/histogram_16_64_256.cu +++ b/src/treelearner/kernels/histogram_16_64_256.cu @@ -150,7 +150,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // size of threads that process this feature4 const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups); - // equavalent thread ID in this subgroup for this feature4 + // equivalent thread ID in this subgroup for this feature4 const unsigned int subglobal_tid = gtid - feature_id * subglobal_size; @@ -508,7 +508,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4 for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) { // prefetch the next iteration variables - // we don't need bondary check because we have made the buffer large + // we don't need boundary check because we have made the buffer large int i_next = i + subglobal_size; #ifdef IGNORE_INDICES // we need to check to bounds here @@ -752,7 +752,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary?? // total size: 2 * 256 * size_of(float) = 2 KB // organization: each feature/grad/hessian is at a different bank, - // as indepedent of the feature value as possible + // as independent of the feature value as possible acc_type *gh_hist = reinterpret_cast(shared_array); // counter histogram diff --git a/src/treelearner/ocl/histogram16.cl b/src/treelearner/ocl/histogram16.cl index 21624ec9ee10..be590c20666b 100644 --- a/src/treelearner/ocl/histogram16.cl +++ b/src/treelearner/ocl/histogram16.cl @@ -8,7 +8,7 @@ #ifndef __OPENCL_VERSION__ // If we are including this file in C++, // the entire source file following (except the last #endif) will become -// a raw string literal. The extra ")" is just for mathcing parentheses +// a raw string literal. The extra ")" is just for matching parentheses // to make the editor happy. The extra ")" and extra endif will be skipped. // DO NOT add anything between here and the next #ifdef, otherwise you need // to modify the skip count at the end of this file. @@ -475,7 +475,7 @@ R""() // prefetch the next iteration variables - // we don't need bondary check because if it is out of boundary, ind_next = 0 + // we don't need boundary check because if it is out of boundary, ind_next = 0 #ifndef IGNORE_INDICES feature4_next = feature_data[ind_next]; #endif diff --git a/src/treelearner/ocl/histogram256.cl b/src/treelearner/ocl/histogram256.cl index 3351f9efa7c3..b5c049e1272d 100644 --- a/src/treelearner/ocl/histogram256.cl +++ b/src/treelearner/ocl/histogram256.cl @@ -387,7 +387,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base, const uint subglobal_tid = gtid - group_feature * subglobal_size; // extract feature mask, when a byte is set to 0, that feature is disabled #if ENABLE_ALL_FEATURES == 1 - // hopefully the compiler will propogate the constants and eliminate all branches + // hopefully the compiler will propagate the constants and eliminate all branches uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff); #else uchar4 feature_mask = feature_masks[group_feature]; diff --git a/src/treelearner/ocl/histogram64.cl b/src/treelearner/ocl/histogram64.cl index 48fa8c506d8b..4ec4d6371df5 100644 --- a/src/treelearner/ocl/histogram64.cl +++ b/src/treelearner/ocl/histogram64.cl @@ -454,7 +454,7 @@ R""() // prefetch the next iteration variables - // we don't need bondary check because if it is out of boundary, ind_next = 0 + // we don't need boundary check because if it is out of boundary, ind_next = 0 #ifndef IGNORE_INDICES feature4_next = feature_data[ind_next]; #endif diff --git a/src/treelearner/parallel_tree_learner.h b/src/treelearner/parallel_tree_learner.h index b942dceab28b..aff8ac0fd4c5 100644 --- a/src/treelearner/parallel_tree_learner.h +++ b/src/treelearner/parallel_tree_learner.h @@ -148,12 +148,12 @@ class VotingParallelTreeLearner: public TREELEARNER_T { * \brief Perform global voting * \param leaf_idx index of leaf * \param splits All splits from local voting - * \param out Result of gobal voting, only store feature indices + * \param out Result of global voting, only store feature indices */ void GlobalVoting(int leaf_idx, const std::vector& splits, std::vector* out); /*! - * \brief Copy local histgram to buffer + * \brief Copy local histogram to buffer * \param smaller_top_features Selected features for smaller leaf * \param larger_top_features Selected features for larger leaf */ @@ -183,9 +183,9 @@ class VotingParallelTreeLearner: public TREELEARNER_T { std::vector block_start_; /*! \brief Block size for reduce scatter */ std::vector block_len_; - /*! \brief Read positions for feature histgrams at smaller leaf */ + /*! \brief Read positions for feature histograms at smaller leaf */ std::vector smaller_buffer_read_start_pos_; - /*! \brief Read positions for feature histgrams at larger leaf */ + /*! \brief Read positions for feature histograms at larger leaf */ std::vector larger_buffer_read_start_pos_; /*! \brief Size for reduce scatter */ comm_size_t reduce_scatter_size_; diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 14ede072dc9e..01cdd7623c02 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -735,24 +735,24 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf, std::set SerialTreeLearner::FindAllForceFeatures(Json force_split_leaf_setting) { std::set force_features; - std::queue force_split_leafs; + std::queue force_split_leaves; - force_split_leafs.push(force_split_leaf_setting); + force_split_leaves.push(force_split_leaf_setting); - while (!force_split_leafs.empty()) { - Json split_leaf = force_split_leafs.front(); - force_split_leafs.pop(); + while (!force_split_leaves.empty()) { + Json split_leaf = force_split_leaves.front(); + force_split_leaves.pop(); const int feature_index = split_leaf["feature"].int_value(); const int feature_inner_index = train_data_->InnerFeatureIndex(feature_index); force_features.insert(feature_inner_index); if (split_leaf.object_items().count("left") > 0) { - force_split_leafs.push(split_leaf["left"]); + force_split_leaves.push(split_leaf["left"]); } if (split_leaf.object_items().count("right") > 0) { - force_split_leafs.push(split_leaf["right"]); + force_split_leaves.push(split_leaf["right"]); } } diff --git a/src/treelearner/split_info.hpp b/src/treelearner/split_info.hpp index 234105eb9a34..8d33a6a76854 100644 --- a/src/treelearner/split_info.hpp +++ b/src/treelearner/split_info.hpp @@ -53,7 +53,7 @@ struct SplitInfo { bool default_left = true; int8_t monotone_type = 0; inline static int Size(int max_cat_threshold) { - return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t); + return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t) + sizeof(int64_t)*2; } inline void CopyTo(char* buffer) const { diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index b88db5a7ba28..37f2d4cf2641 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -207,9 +207,9 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vec smaller_buffer_read_start_pos_[inner_feature_index] = static_cast(cur_size); } // copy - std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram()); - cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); - reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); + std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram()); + cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); + reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); ++smaller_idx; } if (cur_used_features >= cur_total_feature) { @@ -225,9 +225,9 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vec larger_buffer_read_start_pos_[inner_feature_index] = static_cast(cur_size); } // copy - std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram()); - cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); - reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); + std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram()); + cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); + reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); ++larger_idx; } } diff --git a/tests/cpp_tests/test_chunked_array.cpp b/tests/cpp_tests/test_chunked_array.cpp index 9bfd857299ab..bc58918082a8 100644 --- a/tests/cpp_tests/test_chunked_array.cpp +++ b/tests/cpp_tests/test_chunked_array.cpp @@ -217,8 +217,8 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) { // Number of trials for each new ChunkedArray configuration. Pass 100 times over the search space: const size_t N_TRIALS = MAX_CHUNKS_SEARCH * MAX_IN_CHUNK_SEARCH_IDX * 100; const int INVALID = -1; // A negative value signaling the requested value lives in an invalid address. - const int UNITIALIZED = -99; // A negative value to signal this was never updated. - std::vector ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED); // Memorize latest inserted values. + const int UNINITIALIZED = -99; // A negative value to signal this was never updated. + std::vector ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED); // Memorize latest inserted values. // Each outer loop iteration changes the test by adding +1 chunk. We start with 1 chunk only: for (size_t chunks = 1; chunks < MAX_CHUNKS_SEARCH; ++chunks) { @@ -249,10 +249,10 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) { } // Final check: ensure even with overrides, all valid insertions store the latest value at that address: - std::vector coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED); + std::vector coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED); ca_.coalesce_to(coalesced_out.data(), true); // Export all valid addresses. for (size_t i = 0; i < ref_values.size(); ++i) { - if (ref_values[i] != UNITIALIZED) { + if (ref_values[i] != UNINITIALIZED) { // Test in 2 ways that the values are correctly laid out in memory: EXPECT_EQ(ca_.getitem(i / CHUNK_SIZE, i % CHUNK_SIZE, INVALID), ref_values[i]); EXPECT_EQ(coalesced_out[i], ref_values[i]); diff --git a/tests/cpp_tests/test_stream.cpp b/tests/cpp_tests/test_stream.cpp index bc5f73b0a3ee..a656af1e2fe9 100644 --- a/tests/cpp_tests/test_stream.cpp +++ b/tests/cpp_tests/test_stream.cpp @@ -17,7 +17,7 @@ using LightGBM::TestUtils; void test_stream_dense( int8_t creation_type, - DatasetHandle ref_datset_handle, + DatasetHandle ref_dataset_handle, int32_t nrows, int32_t ncols, int32_t nclasses, @@ -86,7 +86,7 @@ void test_stream_dense( case 1: Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows dense data with a batch size of %d", nrows, batch_count); - result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle); + result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result; break; } @@ -131,7 +131,7 @@ void test_stream_dense( void test_stream_sparse( int8_t creation_type, - DatasetHandle ref_datset_handle, + DatasetHandle ref_dataset_handle, int32_t nrows, int32_t ncols, int32_t nclasses, @@ -203,7 +203,7 @@ void test_stream_sparse( case 1: Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows sparse data with a batch size of %d", nrows, batch_count); - result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle); + result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result; break; } @@ -249,13 +249,13 @@ void test_stream_sparse( TEST(Stream, PushDenseRowsWithMetadata) { // Load some test data - DatasetHandle ref_datset_handle; + DatasetHandle ref_dataset_handle; const char* params = "max_bin=15"; // Use the smaller ".test" data because we don't care about the actual data and it's smaller - int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle); + int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle); EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result; - Dataset* ref_dataset = static_cast(ref_datset_handle); + Dataset* ref_dataset = static_cast(ref_dataset_handle); auto noriginalrows = ref_dataset->num_data(); Log::Info("Row count: %d", noriginalrows); Log::Info("Feature group count: %d", ref_dataset->num_features()); @@ -266,9 +266,9 @@ TEST(Stream, PushDenseRowsWithMetadata) { unused_init_scores.resize(noriginalrows * nclasses); std::vector unused_groups; unused_groups.assign(noriginalrows, 1); - result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); + result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result; - result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2); + result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2); EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result; // Now use the reference dataset schema to make some testable Datasets with N rows each @@ -290,23 +290,23 @@ TEST(Stream, PushDenseRowsWithMetadata) { for (size_t j = 0; j < batch_counts.size(); ++j) { auto type = creation_types[i]; auto batch_count = batch_counts[j]; - test_stream_dense(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups); + test_stream_dense(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups); } } - result = LGBM_DatasetFree(ref_datset_handle); + result = LGBM_DatasetFree(ref_dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result; } TEST(Stream, PushSparseRowsWithMetadata) { // Load some test data - DatasetHandle ref_datset_handle; + DatasetHandle ref_dataset_handle; const char* params = "max_bin=15"; // Use the smaller ".test" data because we don't care about the actual data and it's smaller - int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle); + int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle); EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result; - Dataset* ref_dataset = static_cast(ref_datset_handle); + Dataset* ref_dataset = static_cast(ref_dataset_handle); auto noriginalrows = ref_dataset->num_data(); Log::Info("Row count: %d", noriginalrows); Log::Info("Feature group count: %d", ref_dataset->num_features()); @@ -317,9 +317,9 @@ TEST(Stream, PushSparseRowsWithMetadata) { unused_init_scores.resize(noriginalrows * nclasses); std::vector unused_groups; unused_groups.assign(noriginalrows, 1); - result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); + result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result; - result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2); + result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2); EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result; // Now use the reference dataset schema to make some testable Datasets with N rows each @@ -344,10 +344,10 @@ TEST(Stream, PushSparseRowsWithMetadata) { for (size_t j = 0; j < batch_counts.size(); ++j) { auto type = creation_types[i]; auto batch_count = batch_counts[j]; - test_stream_sparse(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups); + test_stream_sparse(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups); } } - result = LGBM_DatasetFree(ref_datset_handle); + result = LGBM_DatasetFree(ref_dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result; } diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 0dfe3e47fa11..bdd4d3f58b80 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -947,3 +947,39 @@ def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(c "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity." ) assert expected_warning in capsys.readouterr().out + + +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("dtype", ["float32", "int64"]) +def test_no_copy_in_dataset_from_numpy_2d(rng, order, dtype): + X = rng.random(size=(100, 3)) + X = np.require(X, dtype=dtype, requirements=order) + X1d, layout = lgb.basic._np2d_to_np1d(X) + if order == "F": + assert layout == lgb.basic._C_API_IS_COL_MAJOR + else: + assert layout == lgb.basic._C_API_IS_ROW_MAJOR + if dtype == "float32": + assert np.shares_memory(X, X1d) + else: + # makes a copy + assert not np.shares_memory(X, X1d) + + +def test_equal_datasets_from_row_major_and_col_major_data(tmp_path): + # row-major dataset + X_row, y = make_blobs(n_samples=1_000, n_features=3, centers=2) + assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"] + ds_row = lgb.Dataset(X_row, y) + ds_row_path = tmp_path / "ds_row.txt" + ds_row._dump_text(ds_row_path) + + # col-major dataset + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"] + ds_col = lgb.Dataset(X_col, y) + ds_col_path = tmp_path / "ds_col.txt" + ds_col._dump_text(ds_col_path) + + # check datasets are equal + assert filecmp.cmp(ds_row_path, ds_col_path) diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py index 2eeba46f2869..b5e17991f63d 100644 --- a/tests/python_package_test/test_dask.py +++ b/tests/python_package_test/test_dask.py @@ -471,7 +471,7 @@ def test_classifier_custom_objective(output, task, cluster): assert_eq(p1_proba, p1_proba_local) -def test_machines_to_worker_map_unparseable_host_names(): +def test_machines_to_worker_map_unparsable_host_names(): workers = {"0.0.0.1:80": {}, "0.0.0.2:80": {}} machines = "0.0.0.1:80,0.0.0.2:80" with pytest.raises(ValueError, match="Could not parse host name from worker address '0.0.0.1:80'"): diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 7ac922375cb3..8ed34724a2ed 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -660,7 +660,7 @@ def test_ranking_prediction_early_stopping(): # Simulates position bias for a given ranking dataset. -# The ouput dataset is identical to the input one with the exception for the relevance labels. +# The output dataset is identical to the input one with the exception for the relevance labels. # The new labels are generated according to an instance of a cascade user model: # for each query, the user is simulated to be traversing the list of documents ranked by a baseline ranker # (in our example it is simply the ordering by some feature correlated with relevance, e.g., 34) @@ -1459,7 +1459,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng): ] ) y = rng.uniform(size=(100,)) - ds = lgb.Dataset(X, y) + ds = lgb.Dataset(X, y, categorical_feature=[1, 2]) params = { "bagging_fraction": 0.8, "bagging_freq": 2, @@ -1474,7 +1474,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng): "verbosity": 0, } model_file = tmp_path / "model.txt" - orig_bst = lgb.train(params, ds, num_boost_round=1, categorical_feature=[1, 2]) + orig_bst = lgb.train(params, ds, num_boost_round=1) orig_bst.save_model(model_file) with model_file.open("rt") as f: model_contents = f.readlines() @@ -1746,16 +1746,18 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path): gbm0 = lgb.train(params, lgb_train, num_boost_round=10) pred0 = gbm0.predict(X_test) assert lgb_train.categorical_feature == "auto" - lgb_train = lgb.Dataset(X, pd.DataFrame(y)) # also test that label can be one-column pd.DataFrame - gbm1 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[0]) + lgb_train = lgb.Dataset( + X, pd.DataFrame(y), categorical_feature=[0] + ) # also test that label can be one-column pd.DataFrame + gbm1 = lgb.train(params, lgb_train, num_boost_round=10) pred1 = gbm1.predict(X_test) assert lgb_train.categorical_feature == [0] - lgb_train = lgb.Dataset(X, pd.Series(y)) # also test that label can be pd.Series - gbm2 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A"]) + lgb_train = lgb.Dataset(X, pd.Series(y), categorical_feature=["A"]) # also test that label can be pd.Series + gbm2 = lgb.train(params, lgb_train, num_boost_round=10) pred2 = gbm2.predict(X_test) assert lgb_train.categorical_feature == ["A"] - lgb_train = lgb.Dataset(X, y) - gbm3 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D"]) + lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D"]) + gbm3 = lgb.train(params, lgb_train, num_boost_round=10) pred3 = gbm3.predict(X_test) assert lgb_train.categorical_feature == ["A", "B", "C", "D"] categorical_model_path = tmp_path / "categorical.model" @@ -1767,12 +1769,12 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path): pred5 = gbm4.predict(X_test) gbm5 = lgb.Booster(model_str=model_str) pred6 = gbm5.predict(X_test) - lgb_train = lgb.Dataset(X, y) - gbm6 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D", "E"]) + lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D", "E"]) + gbm6 = lgb.train(params, lgb_train, num_boost_round=10) pred7 = gbm6.predict(X_test) assert lgb_train.categorical_feature == ["A", "B", "C", "D", "E"] - lgb_train = lgb.Dataset(X, y) - gbm7 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[]) + lgb_train = lgb.Dataset(X, y, categorical_feature=[]) + gbm7 = lgb.train(params, lgb_train, num_boost_round=10) pred8 = gbm7.predict(X_test) assert lgb_train.categorical_feature == [] with pytest.raises(AssertionError): @@ -3672,12 +3674,11 @@ def test_linear_trees(tmp_path, rng_fixed_seed): # test with a categorical feature x[:250, 0] = 0 y[:250] += 10 - lgb_train = lgb.Dataset(x, label=y) + lgb_train = lgb.Dataset(x, label=y, categorical_feature=[0]) est = lgb.train( dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train, num_boost_round=10, - categorical_feature=[0], ) # test refit: same results on same data est2 = est.refit(x, label=y) @@ -3700,10 +3701,20 @@ def test_linear_trees(tmp_path, rng_fixed_seed): # test when num_leaves - 1 < num_features and when num_leaves - 1 > num_features X_train, _, y_train, _ = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2) params = {"linear_tree": True, "verbose": -1, "metric": "mse", "seed": 0} - train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=2)) - est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0]) - train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=60)) - est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0]) + train_data = lgb.Dataset( + X_train, + label=y_train, + params=dict(params, num_leaves=2), + categorical_feature=[0], + ) + est = lgb.train(params, train_data, num_boost_round=10) + train_data = lgb.Dataset( + X_train, + label=y_train, + params=dict(params, num_leaves=60), + categorical_feature=[0], + ) + est = lgb.train(params, train_data, num_boost_round=10) def test_save_and_load_linear(tmp_path): @@ -3714,8 +3725,8 @@ def test_save_and_load_linear(tmp_path): X_train[: X_train.shape[0] // 2, 0] = 0 y_train[: X_train.shape[0] // 2] = 1 params = {"linear_tree": True} - train_data_1 = lgb.Dataset(X_train, label=y_train, params=params) - est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0]) + train_data_1 = lgb.Dataset(X_train, label=y_train, params=params, categorical_feature=[0]) + est_1 = lgb.train(params, train_data_1, num_boost_round=10) pred_1 = est_1.predict(X_train) tmp_dataset = str(tmp_path / "temp_dataset.bin") @@ -4600,3 +4611,18 @@ def test_bagging_by_query_in_lambdarank(): ndcg_score_no_bagging_by_query = gbm_no_bagging_by_query.best_score["valid_0"]["ndcg@5"] assert ndcg_score_bagging_by_query >= ndcg_score - 0.1 assert ndcg_score_no_bagging_by_query >= ndcg_score - 0.1 + + +def test_equal_predict_from_row_major_and_col_major_data(): + X_row, y = make_synthetic_regression() + assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"] + ds = lgb.Dataset(X_row, y) + params = {"num_leaves": 8, "verbose": -1} + bst = lgb.train(params, ds, num_boost_round=5) + preds_row = bst.predict(X_row) + + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"] + preds_col = bst.predict(X_col) + + np.testing.assert_allclose(preds_row, preds_col) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 6eca66ff20d3..1cdd047f1857 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -17,11 +17,18 @@ from sklearn.metrics import accuracy_score, log_loss, mean_squared_error, r2_score from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split from sklearn.multioutput import ClassifierChain, MultiOutputClassifier, MultiOutputRegressor, RegressorChain -from sklearn.utils.estimator_checks import parametrize_with_checks +from sklearn.utils.estimator_checks import parametrize_with_checks as sklearn_parametrize_with_checks from sklearn.utils.validation import check_is_fitted import lightgbm as lgb -from lightgbm.compat import DATATABLE_INSTALLED, PANDAS_INSTALLED, dt_DataTable, pd_DataFrame, pd_Series +from lightgbm.compat import ( + DATATABLE_INSTALLED, + PANDAS_INSTALLED, + _sklearn_version, + dt_DataTable, + pd_DataFrame, + pd_Series, +) from .utils import ( assert_silent, @@ -35,6 +42,9 @@ softmax, ) +SKLEARN_MAJOR, SKLEARN_MINOR, *_ = _sklearn_version.split(".") +SKLEARN_VERSION_GTE_1_6 = (int(SKLEARN_MAJOR), int(SKLEARN_MINOR)) >= (1, 6) + decreasing_generator = itertools.count(0, -1) estimator_classes = (lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker) task_to_model_factory = { @@ -1432,7 +1442,28 @@ def test_getting_feature_names_in_pd_input(estimator_class): np.testing.assert_array_equal(model.feature_names_in_, X.columns) -@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()]) +# Starting with scikit-learn 1.6 (https://github.com/scikit-learn/scikit-learn/pull/30149), +# the only API for marking estimator tests as expected to fail is to pass a keyword argument +# to parametrize_with_checks(). That function didn't accept additional arguments in earlier +# versions. +# +# This block defines a patched version of parametrize_with_checks() so lightgbm's tests +# can be compatible with scikit-learn <1.6 and >=1.6. +# +# This should be removed once minimum supported scikit-learn version is at least 1.6. +if SKLEARN_VERSION_GTE_1_6: + parametrize_with_checks = sklearn_parametrize_with_checks +else: + + def parametrize_with_checks(estimator, *args, **kwargs): + return sklearn_parametrize_with_checks(estimator) + + +def _get_expected_failed_tests(estimator): + return estimator._more_tags()["_xfail_checks"] + + +@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()], expected_failed_checks=_get_expected_failed_tests) def test_sklearn_integration(estimator, check): estimator.set_params(min_child_samples=1, min_data_in_bin=1) check(estimator) @@ -1457,7 +1488,12 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato assert sklearn_tags.input_tags.allow_nan is True assert sklearn_tags.input_tags.sparse is True assert sklearn_tags.target_tags.one_d_labels is True - assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"] + if estimator_class is lgb.LGBMClassifier: + assert sklearn_tags.estimator_type == "classifier" + assert sklearn_tags.classifier_tags.multi_class is True + assert sklearn_tags.classifier_tags.multi_label is False + elif estimator_class is lgb.LGBMRegressor: + assert sklearn_tags.estimator_type == "regressor" @pytest.mark.parametrize("task", all_tasks)