From 81ceaec08d7dac36fa5067241bf67ef111c36953 Mon Sep 17 00:00:00 2001 From: Jirair Aroyan <165020043+JAroyan@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:59:32 +0200 Subject: [PATCH 01/27] [docs] Remove only cpu note due to gpu support for linear trees (#6686) * Remove only cpu note * Change Note for linear tree --------- Co-authored-by: Nikita Titov Co-authored-by: James Lamb --- docs/Parameters.rst | 2 +- include/LightGBM/config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index e5cfaf2dc560..1f80a13d5731 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -795,7 +795,7 @@ Dataset Parameters - it is recommended to rescale data before training so that features have similar mean and standard deviation - - **Note**: works only with ``cpu`` device type and ``serial`` tree learner + - **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner - **Note**: ``regression_l1`` objective is not supported with linear tree boosting diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 4a73c30f9001..d5b56f0fd1fb 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -664,7 +664,7 @@ struct Config { // desc = categorical features are used for splits as normal but are not used in the linear models // desc = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R // desc = it is recommended to rescale data before training so that features have similar mean and standard deviation - // desc = **Note**: works only with ``cpu`` device type and ``serial`` tree learner + // desc = **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner // desc = **Note**: ``regression_l1`` objective is not supported with linear tree boosting // desc = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM // desc = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves From c9d1ac7beac4426c8e636a392bde0f995d1ae8fb Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 26 Oct 2024 12:31:39 -0500 Subject: [PATCH 02/27] [python-package] remove MSVS solution files from sdist (#6698) * [python-package] remove MSVS solution files from sdist * remove one more line --------- Co-authored-by: Nikita Titov --- build-python.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build-python.sh b/build-python.sh index cf790737729e..ff37e4afe225 100755 --- a/build-python.sh +++ b/build-python.sh @@ -205,7 +205,6 @@ create_isolated_source_dir() { cp -R ./include ./lightgbm-python cp -R ./src ./lightgbm-python cp -R ./swig ./lightgbm-python - cp -R ./windows ./lightgbm-python # include only specific files from external_libs, to keep the package # small and avoid redistributing code with licenses incompatible with @@ -303,8 +302,7 @@ if test "${INSTALL}" = true; then ./external_libs \ ./include \ ./src \ - ./swig \ - ./windows + ./swig # use regular-old setuptools for these builds, to avoid # trying to recompile the shared library sed -i.bak -e '/start:build-system/,/end:build-system/d' pyproject.toml From 9b351e6ead39c4274ee7a9a6f3c1acf6a77bd2ce Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Tue, 29 Oct 2024 12:43:12 +0300 Subject: [PATCH 03/27] [ci] Fix version matching between RTD pages and R-package pages (#6673) * Update script.js * Update script.js * Update script.js * Update script.js * replace url at build time * manipulate with raw files --- docs/_static/js/script.js | 17 ++--------------- docs/conf.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js index 107a6a4969a3..3cfc90de887d 100644 --- a/docs/_static/js/script.js +++ b/docs/_static/js/script.js @@ -4,25 +4,12 @@ $(function() { /* List each class property item on a new line https://github.com/microsoft/LightGBM/issues/5073 */ - if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') != -1) { + if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') !== -1) { $('.py.property').each(function() { this.style.setProperty('display', 'inline', 'important'); }); } - /* Point to the same version of R API as the current docs version */ - var current_version_elems = $('.rst-current-version'); - if(current_version_elems.length !== 0) { - var current_version = $(current_version_elems[0]).contents().filter(function() { - return this.nodeType == 3; - }).text().trim().split(' ').pop(); - if(current_version !== 'latest') { - $('a.reference.external[href$="/latest/R/reference/"]').each(function() { - $(this).attr('href', function (_, val) { return val.replace('/latest/', '/' + current_version + '/'); }); - }); - } - } - /* Collapse specified sections in the installation guide */ - if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') != -1) { + if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') !== -1) { $('').appendTo('body'); var collapsable = [ '#build-threadless-version-not-recommended', diff --git a/docs/conf.py b/docs/conf.py index f8bd29a69922..256787bf7f8d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ sys.path.insert(0, str(LIB_PATH)) INTERNAL_REF_REGEX = compile(r"(?P\.\/.+)(?P\.rst)(?P$|#)") +RTD_R_REF_REGEX = compile(r"(?Phttps://.+/)(?Platest)(?P/R/reference/)") class InternalRefTransform(Transform): @@ -69,6 +70,7 @@ def run(self) -> List: os.environ["LIGHTGBM_BUILD_DOC"] = "1" C_API = os.environ.get("C_API", "").lower().strip() != "no" RTD = bool(os.environ.get("READTHEDOCS", "")) +RTD_VERSION = os.environ.get("READTHEDOCS_VERSION", "stable") # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = "2.1.0" # Due to sphinx.ext.napoleon, autodoc_typehints @@ -309,6 +311,22 @@ def generate_r_docs(app: Sphinx) -> None: raise Exception(f"An error has occurred while generating documentation for R-package\n{e}") +def replace_reference_to_r_docs(app: Sphinx) -> None: + """Make reference to R-package documentation point to the actual version. + + Parameters + ---------- + app : sphinx.application.Sphinx + The application object representing the Sphinx process. + """ + index_doc_path = CURR_PATH / "index.rst" + with open(index_doc_path, "r+t", encoding="utf-8") as index_doc: + content = index_doc.read() + content = RTD_R_REF_REGEX.sub(rf"\g{RTD_VERSION}\g", content) + index_doc.seek(0) + index_doc.write(content) + + def setup(app: Sphinx) -> None: """Add new elements at Sphinx initialization time. @@ -330,6 +348,7 @@ def setup(app: Sphinx) -> None: app.connect( "build-finished", lambda app, _: copytree(CURR_PATH.parent / "lightgbm_r" / "docs", Path(app.outdir) / "R") ) + app.connect("builder-inited", replace_reference_to_r_docs) app.add_transform(InternalRefTransform) add_js_file = getattr(app, "add_js_file", False) or app.add_javascript add_js_file("js/script.js") From 4a60a53c38da1356effa65b4d7bee720167d0fc5 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 29 Oct 2024 05:47:25 -0500 Subject: [PATCH 04/27] [ci] [R-package] remove code for 'rchk' checks (#6545) * [ci] [R-package] re-enable 'rchk' checks * comment out most CI * it would be helpful to actually enable rchk * remove all Rf_unprotect() calls to try to trigger failure * try removing some Rf_protect() calls * revert CI changes * just remove rchk * revert whitespace changes --- .ci/setup.sh | 2 +- .ci/test-r-package.sh | 29 +---------------------------- .ci/test.sh | 2 +- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/.ci/setup.sh b/.ci/setup.sh index e551b1683aef..30d564b2d5f4 100755 --- a/.ci/setup.sh +++ b/.ci/setup.sh @@ -142,7 +142,7 @@ else # Linux fi fi -if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then +if [[ "${TASK}" != "r-package" ]]; then if [[ $SETUP_CONDA != "false" ]]; then curl \ -sL \ diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index 7d821676bb71..ae205213d787 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -125,12 +125,7 @@ Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/A # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat' # to avoid a CI-time dependency on devtools (for devtools::install_deps()) -# NOTE: testthat is not required when running rchk -if [[ "${TASK}" == "r-rchk" ]]; then - packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl')" -else - packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" -fi +packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" compile_from_source="both" if [[ $OS_NAME == "macos" ]]; then packages+=", type = 'binary'" @@ -166,28 +161,6 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then ./build-cran-package.sh || exit 1 - if [[ "${TASK}" == "r-rchk" ]]; then - echo "Checking R-package with rchk" - mkdir -p packages - cp "${PKG_TARBALL}" packages - RCHK_LOG_FILE="rchk-logs.txt" - docker run \ - -v "$(pwd)/packages:/rchk/packages" \ - kalibera/rchk:latest \ - "/rchk/packages/${PKG_TARBALL}" \ - > "${RCHK_LOG_FILE}" 2>&1 \ - || (cat ${RCHK_LOG_FILE} && exit 1) - cat ${RCHK_LOG_FILE} - - # the exceptions below are from R itself and not LightGBM: - # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156 - exit "$( - grep "${RCHK_LOG_FILE}" -v "in function strptime_internal" \ - | grep -v "in function RunGenCollect" \ - | grep --count -E '\[PB\]|ERROR' - )" - fi - # Test CRAN source .tar.gz in a directory that is not this repo or below it. # When people install.packages('lightgbm'), they won't have the LightGBM # git repo around. This is to protect against the use of relative paths diff --git a/.ci/test.sh b/.ci/test.sh index 4bf44140dbfd..2fc7820a643d 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -42,7 +42,7 @@ else export MACOSX_DEPLOYMENT_TARGET=12.0 fi -if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then +if [[ "${TASK}" == "r-package" ]]; then bash "${BUILD_DIRECTORY}/.ci/test-r-package.sh" || exit 1 exit 0 fi From dc0ed538aa09b755ba4a293dc6f344da51674260 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 30 Oct 2024 01:49:43 +0300 Subject: [PATCH 05/27] [ci] check PowerShell scripts with PSScriptAnalyzer (part 1) (#6704) * introdure PSScriptAnalyzer * revert workflow * run PSScriptAnalyzer before conda installation --- .ci/lint-powershell.ps1 | 56 ++++++++++++++++++++ .ci/lint-r-code.R | 1 - .ci/test-r-package-windows.ps1 | 97 ++++++++++++++++++---------------- .ci/test-windows.ps1 | 60 ++++++++++----------- .ci/test.sh | 3 ++ .editorconfig | 2 +- 6 files changed, 140 insertions(+), 79 deletions(-) create mode 100644 .ci/lint-powershell.ps1 diff --git a/.ci/lint-powershell.ps1 b/.ci/lint-powershell.ps1 new file mode 100644 index 000000000000..b2e045917ab6 --- /dev/null +++ b/.ci/lint-powershell.ps1 @@ -0,0 +1,56 @@ +$settings = @{ + Severity = @( + 'Information', + 'Warning', + 'Error' + ) + IncludeDefaultRules = $true + # Additional rules that are disabled by default + Rules = @{ + PSAvoidExclaimOperator = @{ + Enable = $true + } + PSAvoidLongLines = @{ + Enable = $true + MaximumLineLength = 120 + } + PSAvoidSemicolonsAsLineTerminators = @{ + Enable = $true + } + PSPlaceCloseBrace = @{ + Enable = $true + NoEmptyLineBefore = $true + IgnoreOneLineBlock = $true + NewLineAfter = $false + } + PSPlaceOpenBrace = @{ + Enable = $true + OnSameLine = $true + NewLineAfter = $true + IgnoreOneLineBlock = $true + } + PSUseConsistentIndentation = @{ + Enable = $true + IndentationSize = 4 + PipelineIndentation = 'IncreaseIndentationAfterEveryPipeline' + Kind = 'space' + } + PSUseConsistentWhitespace = @{ + Enable = $true + CheckInnerBrace = $true + CheckOpenBrace = $true + CheckOpenParen = $true + CheckOperator = $true + CheckSeparator = $true + CheckPipe = $true + CheckPipeForRedundantWhitespace = $true + CheckParameter = $true + IgnoreAssignmentOperatorInsideHashTable = $false + } + PSUseCorrectCasing = @{ + Enable = $true + } + } +} + +Invoke-ScriptAnalyzer -Path "$env:BUILD_DIRECTORY/.ci" -Recurse -EnableExit -Settings $settings diff --git a/.ci/lint-r-code.R b/.ci/lint-r-code.R index 8de09c0ff1ac..9eae00aa5d49 100755 --- a/.ci/lint-r-code.R +++ b/.ci/lint-r-code.R @@ -1,4 +1,3 @@ - loadNamespace("lintr") args <- commandArgs( diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index 269695c51462..57055db1a69f 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -1,16 +1,16 @@ # Download a file and retry upon failure. This looks like # an infinite loop but CI-level timeouts will kill it -function Download-File-With-Retries { - param( - [string]$url, - [string]$destfile - ) - $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed - do { - Write-Output "Downloading ${url}" - sleep 5; - Invoke-WebRequest -Uri $url -OutFile $destfile - } while(!$?); +function Get-File-With-Tenacity { + param( + [Parameter(Mandatory = $true)][string]$url, + [Parameter(Mandatory = $true)][string]$destfile + ) + $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed + do { + Write-Output "Downloading ${url}" + sleep 5 + Invoke-WebRequest -Uri $url -OutFile $destfile + } while (-not $?) } # External utilities like R.exe / Rscript.exe writing to stderr (even for harmless @@ -20,20 +20,23 @@ function Download-File-With-Retries { # Using standard PowerShell redirection does not work to avoid these errors. # This function uses R's built-in redirection mechanism, sink(). Any place where # this function is used is a command that writes harmless messages to stderr -function Run-R-Code-Redirect-Stderr { - param( - [string]$rcode - ) - $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()" - Rscript --vanilla -e $decorated_code +function Invoke-R-Code-Redirect-Stderr { + param( + [Parameter(Mandatory = $true)][string]$rcode + ) + $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()" + Rscript --vanilla -e $decorated_code } # Remove all items matching some pattern from PATH environment variable function Remove-From-Path { - param( - [string]$pattern_to_remove - ) - $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';' + [CmdletBinding(SupportsShouldProcess)] + param( + [Parameter(Mandatory = $true)][string]$pattern_to_remove + ) + if ($PSCmdlet.ShouldProcess($env:PATH, "Removing ${pattern_to_remove}")) { + $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';' + } } # remove some details that exist in the GitHub Actions images which might @@ -87,7 +90,7 @@ if ($env:R_MAJOR_VERSION -eq "3") { $env:R_WINDOWS_VERSION = "4.3.1" } else { Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION" - Check-Output $false + Assert-Output $false } $env:CMAKE_VERSION = "3.30.0" @@ -120,29 +123,29 @@ tzutil /s "GMT Standard Time" # download R, RTools and CMake Write-Output "Downloading R, Rtools and CMake" -Download-File-With-Retries -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe" -Download-File-With-Retries -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe" -Download-File-With-Retries -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip" +Get-File-With-Tenacity -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe" +Get-File-With-Tenacity -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe" +Get-File-With-Tenacity -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip" # Install R Write-Output "Installing R" -Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Check-Output $? +Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Assert-Output $? Write-Output "Done installing R" Write-Output "Installing Rtools" -Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Check-Output $? +Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Assert-Output $? Write-Output "Done installing Rtools" Write-Output "Installing CMake" Add-Type -AssemblyName System.IO.Compression.FileSystem -[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Check-Output $? +[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $? # Remove old CMake shiped with RTools Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore Write-Output "Done installing CMake" Write-Output "Installing dependencies" $packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" -Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $? +Invoke-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Assert-Output $? Write-Output "Building R-package" @@ -163,9 +166,9 @@ if ($env:COMPILER -ne "MSVC") { $env:BUILD_R_FLAGS = "'--skip-install'" } else { Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" - Check-Output $false + Assert-Output $false } - Run-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Check-Output $? + Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $? } elseif ($env:R_BUILD_TYPE -eq "cran") { # NOTE: gzip and tar are needed to create a CRAN package on Windows, but # some flavors of tar.exe can fail in some settings on Windows. @@ -174,7 +177,7 @@ if ($env:COMPILER -ne "MSVC") { if ($env:R_MAJOR_VERSION -eq "3") { $env:PATH = "C:\msys64\usr\bin;" + $env:PATH } - Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Check-Output $? + Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $? Remove-From-Path ".*msys64.*" # Test CRAN source .tar.gz in a directory that is not this repo or below it. # When people install.packages('lightgbm'), they won't have the LightGBM @@ -193,31 +196,31 @@ if ($env:COMPILER -ne "MSVC") { } else { $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" } - Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? + Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? Write-Output "R CMD check build logs:" $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" Get-Content -Path "$INSTALL_LOG_FILE_NAME" - Check-Output $check_succeeded + Assert-Output $check_succeeded Write-Output "Looking for issues with R CMD check results" if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" - Check-Output $False + Assert-Output $False } } else { $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" - Run-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? + Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? Write-Output "----- build and install logs -----" Get-Content -Path "$INSTALL_LOG_FILE_NAME" Write-Output "----- end of build and install logs -----" - Check-Output $install_succeeded + Assert-Output $install_succeeded # some errors are not raised above, but can be found in the logs if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { echo "ERRORs have been found installing lightgbm" - Check-Output $False + Assert-Output $False } } @@ -231,7 +234,7 @@ if ($env:TOOLCHAIN -ne "MSVC") { } if ($checks_cnt -eq 0) { Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs." - Check-Output $False + Assert-Output $False } # Checking that we actually got the expected compiler. The R-package has some logic @@ -241,7 +244,7 @@ if ($env:R_BUILD_TYPE -eq "cmake") { $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" if ($checks.Matches.length -eq 0) { Write-Output "The wrong compiler was used. Check the build logs." - Check-Output $False + Assert-Output $False } } @@ -251,7 +254,7 @@ if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN" if ($checks.Matches.length -eq 0) { Write-Output "The wrong toolchain was used. Check the build logs." - Check-Output $False + Assert-Output $False } } @@ -267,7 +270,7 @@ if ($env:R_BUILD_TYPE -eq "cran") { } if ($checks_cnt -eq 0) { Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs." - Check-Output $False + Assert-Output $False } # Checking that MM_MALLOC preprocessor definition is actually used in CI builds. @@ -282,7 +285,7 @@ if ($env:R_BUILD_TYPE -eq "cran") { } if ($checks_cnt -eq 0) { Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs." - Check-Output $False + Assert-Output $False } # Checking that OpenMP is actually used in CMake builds. @@ -290,17 +293,17 @@ if ($env:R_BUILD_TYPE -eq "cmake") { $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*" if ($checks.Matches.length -eq 0) { Write-Output "OpenMP wasn't found. Check the build logs." - Check-Output $False + Assert-Output $False } } if ($env:COMPILER -eq "MSVC") { Write-Output "Running tests with testthat.R" cd R-package/tests - # NOTE: using Rscript.exe intentionally here, instead of Run-R-Code-Redirect-Stderr, - # because something about the interaction between Run-R-Code-Redirect-Stderr + # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr, + # because something about the interaction between Invoke-R-Code-Redirect-Stderr # and testthat results in failing tests not exiting with a non-0 exit code. - Rscript.exe --vanilla "testthat.R" ; Check-Output $? + Rscript.exe --vanilla "testthat.R" ; Assert-Output $? } Write-Output "No issues were found checking the R-package" diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1 index a2c498531262..87c214856212 100644 --- a/.ci/test-windows.ps1 +++ b/.ci/test-windows.ps1 @@ -1,9 +1,9 @@ -function Check-Output { - param( [bool]$success ) - if (!$success) { - $host.SetShouldExit(-1) - exit 1 - } +function Assert-Output { + param( [Parameter(Mandatory = $true)][bool]$success ) + if (-not $success) { + $host.SetShouldExit(-1) + exit 1 + } } $env:CONDA_ENV = "test-env" @@ -17,14 +17,14 @@ Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore [Void][System.IO.Directory]::CreateDirectory($env:TMPDIR) if ($env:TASK -eq "r-package") { - & .\.ci\test-r-package-windows.ps1 ; Check-Output $? + & .\.ci\test-r-package-windows.ps1 ; Assert-Output $? Exit 0 } if ($env:TASK -eq "cpp-tests") { cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64 - cmake --build build --target testlightgbm --config Debug ; Check-Output $? - .\Debug\testlightgbm.exe ; Check-Output $? + cmake --build build --target testlightgbm --config Debug ; Assert-Output $? + .\Debug\testlightgbm.exe ; Assert-Output $? Exit 0 } @@ -33,23 +33,23 @@ if ($env:TASK -eq "swig") { $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl" Add-Type -AssemblyName System.IO.Compression.FileSystem - [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Check-Output $? + [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $? $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? Write-Output "CMake build logs:" Get-Content -Path "$BuildLogFileName" - Check-Output $build_succeeded + Assert-Output $build_succeeded $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe" $checks_cnt = $checks.Matches.length if ($checks_cnt -eq 0) { Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs." - Check-Output $False + Assert-Output $False } - cmake --build build --target ALL_BUILD --config Release ; Check-Output $? + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? if ($env:AZURE -eq "true") { - cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $? + cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $? } Exit 0 } @@ -72,7 +72,7 @@ conda create ` -y ` -n $env:CONDA_ENV ` --file $env:CONDA_REQUIREMENT_FILE ` - "python=$env:PYTHON_VERSION[build=*cpython]" ; Check-Output $? + "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $? if ($env:TASK -ne "bdist") { conda activate $env:CONDA_ENV @@ -80,37 +80,37 @@ if ($env:TASK -ne "bdist") { cd $env:BUILD_SOURCESDIRECTORY if ($env:TASK -eq "regular") { - cmake -B build -S . -A x64 ; Check-Output $? - cmake --build build --target ALL_BUILD --config Release ; Check-Output $? - sh ./build-python.sh install --precompile ; Check-Output $? + cmake -B build -S . -A x64 ; Assert-Output $? + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? + sh ./build-python.sh install --precompile ; Assert-Output $? cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY } elseif ($env:TASK -eq "sdist") { - sh ./build-python.sh sdist ; Check-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Check-Output $? - cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $? + sh ./build-python.sh sdist ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? } elseif ($env:TASK -eq "bdist") { # Import the Chocolatey profile module so that the RefreshEnv command # invoked below properly updates the current PowerShell session environment. $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - Import-Module "$module" ; Check-Output $? + Import-Module "$module" ; Assert-Output $? RefreshEnv Write-Output "Current OpenCL drivers:" Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors conda activate $env:CONDA_ENV - sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Check-Output $? - cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $? + sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { if ($env:COMPILER -eq "MINGW") { - sh ./build-python.sh install --mingw ; Check-Output $? + sh ./build-python.sh install --mingw ; Assert-Output $? } else { - sh ./build-python.sh install; Check-Output $? + sh ./build-python.sh install; Assert-Output $? } } @@ -125,7 +125,7 @@ if ($env:TASK -eq "bdist") { $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1" } -pytest $tests ; Check-Output $? +pytest $tests ; Assert-Output $? if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide @@ -134,9 +134,9 @@ if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -e conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" foreach ($file in @(Get-ChildItem *.py)) { @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file - python $file ; Check-Output $? + python $file ; Assert-Output $? } # run all examples cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb" - jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Check-Output $? # run all notebooks + jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? # run all notebooks } diff --git a/.ci/test.sh b/.ci/test.sh index 2fc7820a643d..9b3e1ee3938d 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -98,6 +98,9 @@ if [[ $TASK == "swig" ]]; then fi if [[ $TASK == "lint" ]]; then + pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck" + echo "Linting PowerShell code" + pwsh -file "./.ci/lint-powershell.ps1" || exit 0 conda create -q -y -n "${CONDA_ENV}" \ "${CONDA_PYTHON_REQUIREMENT}" \ 'cmakelint>=1.4.3' \ diff --git a/.editorconfig b/.editorconfig index f7bd94f4f905..f4ae446b64bb 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,7 +7,7 @@ insert_final_newline = true indent_style = space indent_size = 2 -[*.{py,sh,js}] +[*.{py,sh,js,ps1}] indent_size = 4 line_length = 120 skip = external_libs From 8d5dca2e3a6181ba788f8aa3d7a69d08e9d0ea07 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 31 Oct 2024 08:48:47 -0500 Subject: [PATCH 06/27] [python-package] remove support for passing 'feature_name' and 'categorical_feature' through train() and cv() (#6706) --- python-package/lightgbm/engine.py | 69 +----------------------- tests/python_package_test/test_engine.py | 51 +++++++++++------- 2 files changed, 33 insertions(+), 87 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 89910599b0ca..dca6b607cdc7 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -3,7 +3,6 @@ import copy import json -import warnings from collections import OrderedDict, defaultdict from operator import attrgetter from pathlib import Path @@ -15,17 +14,14 @@ from .basic import ( Booster, Dataset, - LGBMDeprecationWarning, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _LGBM_BoosterEvalMethodResultType, _LGBM_BoosterEvalMethodResultWithStandardDeviationType, - _LGBM_CategoricalFeatureConfiguration, _LGBM_CustomObjectiveFunction, _LGBM_EvalFunctionResultType, - _LGBM_FeatureNameConfiguration, _log_warning, ) from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold @@ -54,15 +50,6 @@ ] -def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None: - msg = ( - f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in " - f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. " - "See https://github.com/microsoft/LightGBM/issues/6435." - ) - warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2) - - def _choose_num_iterations(num_boost_round_kwarg: int, params: Dict[str, Any]) -> Dict[str, Any]: """Choose number of boosting rounds. @@ -127,8 +114,6 @@ def train( valid_names: Optional[List[str]] = None, feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, - feature_name: _LGBM_FeatureNameConfiguration = "auto", - categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto", keep_training_booster: bool = False, callbacks: Optional[List[Callable]] = None, ) -> Booster: @@ -170,21 +155,6 @@ def train( set the ``metric`` parameter to the string ``"None"`` in ``params``. init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of str, or 'auto', optional (default="auto") - **Deprecated.** Set ``feature_name`` on ``train_set`` instead. - Feature names. - If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of str or int, or 'auto', optional (default="auto") - **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. - Categorical features. - If list of int, interpreted as indices. - If list of str, interpreted as feature names (need to specify ``feature_name`` as well). - If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. - All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647). - Large values could be memory consuming. Consider using consecutive integers starting from zero. - All negative values in categorical features will be treated as missing values. - The output cannot be monotonically constrained with respect to a categorical feature. - Floating point numbers in categorical features will be rounded towards 0. keep_training_booster : bool, optional (default=False) Whether the returned Booster will be used to keep training. If False, the returned value will be converted into _InnerPredictor before returning. @@ -233,13 +203,6 @@ def train( f"Item {i} has type '{type(valid_item).__name__}'." ) - # raise deprecation warnings if necessary - # ref: https://github.com/microsoft/LightGBM/issues/6435 - if categorical_feature != "auto": - _emit_dataset_kwarg_warning("train", "categorical_feature") - if feature_name != "auto": - _emit_dataset_kwarg_warning("train", "feature_name") - # create predictor first params = copy.deepcopy(params) params = _choose_param_value( @@ -278,9 +241,7 @@ def train( else: init_iteration = 0 - train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature( - categorical_feature - ) + train_set._update_params(params)._set_predictor(predictor) is_valid_contain_train = False train_data_name = "training" @@ -642,8 +603,6 @@ def cv( metrics: Optional[Union[str, List[str]]] = None, feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, - feature_name: _LGBM_FeatureNameConfiguration = "auto", - categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto", fpreproc: Optional[_LGBM_PreprocFunction] = None, seed: int = 0, callbacks: Optional[List[Callable]] = None, @@ -699,21 +658,6 @@ def cv( set ``metrics`` to the string ``"None"``. init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of str, or 'auto', optional (default="auto") - **Deprecated.** Set ``feature_name`` on ``train_set`` instead. - Feature names. - If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of str or int, or 'auto', optional (default="auto") - **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. - Categorical features. - If list of int, interpreted as indices. - If list of str, interpreted as feature names (need to specify ``feature_name`` as well). - If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. - All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647). - Large values could be memory consuming. Consider using consecutive integers starting from zero. - All negative values in categorical features will be treated as missing values. - The output cannot be monotonically constrained with respect to a categorical feature. - Floating point numbers in categorical features will be rounded towards 0. fpreproc : callable or None, optional (default=None) Preprocessing function that takes (dtrain, dtest, params) and returns transformed versions of those. @@ -767,13 +711,6 @@ def cv( if not isinstance(train_set, Dataset): raise TypeError(f"cv() only accepts Dataset object, train_set has type '{type(train_set).__name__}'.") - # raise deprecation warnings if necessary - # ref: https://github.com/microsoft/LightGBM/issues/6435 - if categorical_feature != "auto": - _emit_dataset_kwarg_warning("cv", "categorical_feature") - if feature_name != "auto": - _emit_dataset_kwarg_warning("cv", "feature_name") - params = copy.deepcopy(params) params = _choose_param_value( main_param_name="objective", @@ -818,9 +755,7 @@ def cv( params.pop(metric_alias, None) params["metric"] = metrics - train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature( - categorical_feature - ) + train_set._update_params(params)._set_predictor(predictor) results = defaultdict(list) cvfolds = _make_n_folds( diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 286f066a3526..9ae471e7f4b9 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1459,7 +1459,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng): ] ) y = rng.uniform(size=(100,)) - ds = lgb.Dataset(X, y) + ds = lgb.Dataset(X, y, categorical_feature=[1, 2]) params = { "bagging_fraction": 0.8, "bagging_freq": 2, @@ -1474,7 +1474,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng): "verbosity": 0, } model_file = tmp_path / "model.txt" - orig_bst = lgb.train(params, ds, num_boost_round=1, categorical_feature=[1, 2]) + orig_bst = lgb.train(params, ds, num_boost_round=1) orig_bst.save_model(model_file) with model_file.open("rt") as f: model_contents = f.readlines() @@ -1746,16 +1746,18 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path): gbm0 = lgb.train(params, lgb_train, num_boost_round=10) pred0 = gbm0.predict(X_test) assert lgb_train.categorical_feature == "auto" - lgb_train = lgb.Dataset(X, pd.DataFrame(y)) # also test that label can be one-column pd.DataFrame - gbm1 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[0]) + lgb_train = lgb.Dataset( + X, pd.DataFrame(y), categorical_feature=[0] + ) # also test that label can be one-column pd.DataFrame + gbm1 = lgb.train(params, lgb_train, num_boost_round=10) pred1 = gbm1.predict(X_test) assert lgb_train.categorical_feature == [0] - lgb_train = lgb.Dataset(X, pd.Series(y)) # also test that label can be pd.Series - gbm2 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A"]) + lgb_train = lgb.Dataset(X, pd.Series(y), categorical_feature=["A"]) # also test that label can be pd.Series + gbm2 = lgb.train(params, lgb_train, num_boost_round=10) pred2 = gbm2.predict(X_test) assert lgb_train.categorical_feature == ["A"] - lgb_train = lgb.Dataset(X, y) - gbm3 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D"]) + lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D"]) + gbm3 = lgb.train(params, lgb_train, num_boost_round=10) pred3 = gbm3.predict(X_test) assert lgb_train.categorical_feature == ["A", "B", "C", "D"] categorical_model_path = tmp_path / "categorical.model" @@ -1767,12 +1769,12 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path): pred5 = gbm4.predict(X_test) gbm5 = lgb.Booster(model_str=model_str) pred6 = gbm5.predict(X_test) - lgb_train = lgb.Dataset(X, y) - gbm6 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D", "E"]) + lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D", "E"]) + gbm6 = lgb.train(params, lgb_train, num_boost_round=10) pred7 = gbm6.predict(X_test) assert lgb_train.categorical_feature == ["A", "B", "C", "D", "E"] - lgb_train = lgb.Dataset(X, y) - gbm7 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[]) + lgb_train = lgb.Dataset(X, y, categorical_feature=[]) + gbm7 = lgb.train(params, lgb_train, num_boost_round=10) pred8 = gbm7.predict(X_test) assert lgb_train.categorical_feature == [] with pytest.raises(AssertionError): @@ -3672,12 +3674,11 @@ def test_linear_trees(tmp_path, rng_fixed_seed): # test with a categorical feature x[:250, 0] = 0 y[:250] += 10 - lgb_train = lgb.Dataset(x, label=y) + lgb_train = lgb.Dataset(x, label=y, categorical_feature=[0]) est = lgb.train( dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train, num_boost_round=10, - categorical_feature=[0], ) # test refit: same results on same data est2 = est.refit(x, label=y) @@ -3700,10 +3701,20 @@ def test_linear_trees(tmp_path, rng_fixed_seed): # test when num_leaves - 1 < num_features and when num_leaves - 1 > num_features X_train, _, y_train, _ = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2) params = {"linear_tree": True, "verbose": -1, "metric": "mse", "seed": 0} - train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=2)) - est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0]) - train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=60)) - est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0]) + train_data = lgb.Dataset( + X_train, + label=y_train, + params=dict(params, num_leaves=2), + categorical_feature=[0], + ) + est = lgb.train(params, train_data, num_boost_round=10) + train_data = lgb.Dataset( + X_train, + label=y_train, + params=dict(params, num_leaves=60), + categorical_feature=[0], + ) + est = lgb.train(params, train_data, num_boost_round=10) def test_save_and_load_linear(tmp_path): @@ -3714,8 +3725,8 @@ def test_save_and_load_linear(tmp_path): X_train[: X_train.shape[0] // 2, 0] = 0 y_train[: X_train.shape[0] // 2] = 1 params = {"linear_tree": True} - train_data_1 = lgb.Dataset(X_train, label=y_train, params=params) - est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0]) + train_data_1 = lgb.Dataset(X_train, label=y_train, params=params, categorical_feature=[0]) + est_1 = lgb.train(params, train_data_1, num_boost_round=10) pred_1 = est_1.predict(X_train) tmp_dataset = str(tmp_path / "temp_dataset.bin") From 92aa07b4b827d020d9aeeeddec0b3416042c9d2a Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Fri, 1 Nov 2024 07:09:16 +0300 Subject: [PATCH 07/27] [ci] check PowerShell scripts with PSScriptAnalyzer (part 2) (#6709) --- .ci/install-opencl.ps1 | 28 ++-- .ci/test-r-package-windows.ps1 | 242 ++++++++++++++++----------------- .ci/test-windows.ps1 | 156 ++++++++++----------- .ci/test.sh | 2 +- 4 files changed, 214 insertions(+), 214 deletions(-) diff --git a/.ci/install-opencl.ps1 b/.ci/install-opencl.ps1 index 7e335fe13aa4..e48f24e4bf05 100644 --- a/.ci/install-opencl.ps1 +++ b/.ci/install-opencl.ps1 @@ -7,12 +7,12 @@ $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows dow Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer" if (Test-Path "$installer") { - Write-Output "Successfully downloaded OpenCL platform installer" + Write-Output "Successfully downloaded OpenCL platform installer" } else { - Write-Output "Unable to download OpenCL platform installer" - Write-Output "Setting EXIT" - $host.SetShouldExit(-1) - exit 1 + Write-Output "Unable to download OpenCL platform installer" + Write-Output "Setting EXIT" + $host.SetShouldExit(-1) + exit 1 } # Install OpenCL platform from installer executable @@ -21,14 +21,14 @@ Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/q $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors if ($property -eq $null) { - Write-Output "Unable to install OpenCL CPU platform" - Write-Output "OpenCL installation log:" - Get-Content "opencl.log" - Write-Output "Setting EXIT" - $host.SetShouldExit(-1) - exit 1 + Write-Output "Unable to install OpenCL CPU platform" + Write-Output "OpenCL installation log:" + Get-Content "opencl.log" + Write-Output "Setting EXIT" + $host.SetShouldExit(-1) + exit 1 } else { - Write-Output "Successfully installed OpenCL CPU platform" - Write-Output "Current OpenCL drivers:" - Write-Output $property + Write-Output "Successfully installed OpenCL CPU platform" + Write-Output "Current OpenCL drivers:" + Write-Output $property } diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index 57055db1a69f..1dff55c2a9aa 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -75,22 +75,22 @@ Remove-Item C:\rtools43 -Force -Recurse -ErrorAction Ignore # * some paths and file names are different on R4.0 $env:R_MAJOR_VERSION = $env:R_VERSION.split('.')[0] if ($env:R_MAJOR_VERSION -eq "3") { - # Rtools 3.x has to be installed at C:\Rtools\ - # * https://stackoverflow.com/a/46619260/3986677 - $RTOOLS_INSTALL_PATH = "C:\Rtools" - $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin" - $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin" - $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe" - $env:R_WINDOWS_VERSION = "3.6.3" + # Rtools 3.x has to be installed at C:\Rtools\ + # * https://stackoverflow.com/a/46619260/3986677 + $RTOOLS_INSTALL_PATH = "C:\Rtools" + $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin" + $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin" + $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe" + $env:R_WINDOWS_VERSION = "3.6.3" } elseif ($env:R_MAJOR_VERSION -eq "4") { - $RTOOLS_INSTALL_PATH = "C:\rtools43" - $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin" - $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin" - $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe" - $env:R_WINDOWS_VERSION = "4.3.1" + $RTOOLS_INSTALL_PATH = "C:\rtools43" + $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin" + $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin" + $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe" + $env:R_WINDOWS_VERSION = "4.3.1" } else { - Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION" - Assert-Output $false + Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION" + Assert-Output $false } $env:CMAKE_VERSION = "3.30.0" @@ -99,9 +99,9 @@ $env:R_LIBS = "$env:R_LIB_PATH" $env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation" $env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH if ([version]$env:R_VERSION -lt [version]"4.0") { - $env:CRAN_MIRROR = "https://cran-archive.r-project.org" + $env:CRAN_MIRROR = "https://cran-archive.r-project.org" } else { - $env:CRAN_MIRROR = "https://cran.rstudio.com" + $env:CRAN_MIRROR = "https://cran.rstudio.com" } $env:MIKTEX_EXCEPTION_PATH = "$env:TEMP\miktex" @@ -112,8 +112,8 @@ if ($env:R_BUILD_TYPE -ne "cran") { } if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { - $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe" - $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" + $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe" + $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" } cd $env:BUILD_SOURCESDIRECTORY @@ -152,158 +152,158 @@ Write-Output "Building R-package" # R CMD check is not used for MSVC builds if ($env:COMPILER -ne "MSVC") { - $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz" - $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" + $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz" + $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log" - if ($env:R_BUILD_TYPE -eq "cmake") { - if ($env:TOOLCHAIN -eq "MINGW") { - Write-Output "Telling R to use MinGW" - $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')" - } elseif ($env:TOOLCHAIN -eq "MSYS") { - Write-Output "Telling R to use MSYS" - $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')" - } elseif ($env:TOOLCHAIN -eq "MSVC") { - $env:BUILD_R_FLAGS = "'--skip-install'" - } else { - Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" - Assert-Output $false - } - Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $? - } elseif ($env:R_BUILD_TYPE -eq "cran") { - # NOTE: gzip and tar are needed to create a CRAN package on Windows, but - # some flavors of tar.exe can fail in some settings on Windows. - # Putting the msys64 utilities at the beginning of PATH temporarily to be - # sure they're used for that purpose. - if ($env:R_MAJOR_VERSION -eq "3") { - $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + if ($env:R_BUILD_TYPE -eq "cmake") { + if ($env:TOOLCHAIN -eq "MINGW") { + Write-Output "Telling R to use MinGW" + $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')" + } elseif ($env:TOOLCHAIN -eq "MSYS") { + Write-Output "Telling R to use MSYS" + $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')" + } elseif ($env:TOOLCHAIN -eq "MSVC") { + $env:BUILD_R_FLAGS = "'--skip-install'" + } else { + Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" + Assert-Output $false + } + Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $? + } elseif ($env:R_BUILD_TYPE -eq "cran") { + # NOTE: gzip and tar are needed to create a CRAN package on Windows, but + # some flavors of tar.exe can fail in some settings on Windows. + # Putting the msys64 utilities at the beginning of PATH temporarily to be + # sure they're used for that purpose. + if ($env:R_MAJOR_VERSION -eq "3") { + $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + } + Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $? + Remove-From-Path ".*msys64.*" + # Test CRAN source .tar.gz in a directory that is not this repo or below it. + # When people install.packages('lightgbm'), they won't have the LightGBM + # git repo around. This is to protect against the use of relative paths + # like ../../CMakeLists.txt that would only work if you are in the repoo + $R_CMD_CHECK_DIR = "tmp-r-cmd-check" + New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null + Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null + cd "C:\$R_CMD_CHECK_DIR\" } - Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $? - Remove-From-Path ".*msys64.*" - # Test CRAN source .tar.gz in a directory that is not this repo or below it. - # When people install.packages('lightgbm'), they won't have the LightGBM - # git repo around. This is to protect against the use of relative paths - # like ../../CMakeLists.txt that would only work if you are in the repoo - $R_CMD_CHECK_DIR = "tmp-r-cmd-check" - New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null - Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null - cd "C:\$R_CMD_CHECK_DIR\" - } - Write-Output "Running R CMD check" - if ($env:R_BUILD_TYPE -eq "cran") { - # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit) - $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" - } else { - $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" - } - Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? + Write-Output "Running R CMD check" + if ($env:R_BUILD_TYPE -eq "cran") { + # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit) + $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" + } else { + $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" + } + Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? - Write-Output "R CMD check build logs:" - $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" - Get-Content -Path "$INSTALL_LOG_FILE_NAME" + Write-Output "R CMD check build logs:" + $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" - Assert-Output $check_succeeded + Assert-Output $check_succeeded - Write-Output "Looking for issues with R CMD check results" - if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { - echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" - Assert-Output $False - } + Write-Output "Looking for issues with R CMD check results" + if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { + echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" + Assert-Output $False + } } else { - $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" - Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? - Write-Output "----- build and install logs -----" - Get-Content -Path "$INSTALL_LOG_FILE_NAME" - Write-Output "----- end of build and install logs -----" - Assert-Output $install_succeeded - # some errors are not raised above, but can be found in the logs - if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { - echo "ERRORs have been found installing lightgbm" - Assert-Output $False - } + $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" + Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? + Write-Output "----- build and install logs -----" + Get-Content -Path "$INSTALL_LOG_FILE_NAME" + Write-Output "----- end of build and install logs -----" + Assert-Output $install_succeeded + # some errors are not raised above, but can be found in the logs + if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { + echo "ERRORs have been found installing lightgbm" + Assert-Output $False + } } # Checking that the correct R version was used if ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION" + $checks_cnt = $checks.Matches.length } else { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" + $checks_cnt = $checks.Matches.length } if ($checks_cnt -eq 0) { - Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs." - Assert-Output $False + Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs." + Assert-Output $False } # Checking that we actually got the expected compiler. The R-package has some logic # to fail back to MinGW if MSVC fails, but for CI builds we need to check that the correct # compiler was used. if ($env:R_BUILD_TYPE -eq "cmake") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" - if ($checks.Matches.length -eq 0) { - Write-Output "The wrong compiler was used. Check the build logs." - Assert-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER" + if ($checks.Matches.length -eq 0) { + Write-Output "The wrong compiler was used. Check the build logs." + Assert-Output $False + } } # Checking that we got the right toolchain for MinGW. If using MinGW, both # MinGW and MSYS toolchains are supported if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN" - if ($checks.Matches.length -eq 0) { - Write-Output "The wrong toolchain was used. Check the build logs." - Assert-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN" + if ($checks.Matches.length -eq 0) { + Write-Output "The wrong toolchain was used. Check the build logs." + Assert-Output $False + } } # Checking that MM_PREFETCH preprocessor definition is actually used in CI builds. if ($env:R_BUILD_TYPE -eq "cran") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes" + $checks_cnt = $checks.Matches.length } elseif ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success" + $checks_cnt = $checks.Matches.length } else { - $checks_cnt = 1 + $checks_cnt = 1 } if ($checks_cnt -eq 0) { - Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs." - Assert-Output $False + Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs." + Assert-Output $False } # Checking that MM_MALLOC preprocessor definition is actually used in CI builds. if ($env:R_BUILD_TYPE -eq "cran") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes" + $checks_cnt = $checks.Matches.length } elseif ($env:TOOLCHAIN -ne "MSVC") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success" - $checks_cnt = $checks.Matches.length + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success" + $checks_cnt = $checks.Matches.length } else { - $checks_cnt = 1 + $checks_cnt = 1 } if ($checks_cnt -eq 0) { - Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs." - Assert-Output $False + Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs." + Assert-Output $False } # Checking that OpenMP is actually used in CMake builds. if ($env:R_BUILD_TYPE -eq "cmake") { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*" - if ($checks.Matches.length -eq 0) { - Write-Output "OpenMP wasn't found. Check the build logs." - Assert-Output $False - } + $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*" + if ($checks.Matches.length -eq 0) { + Write-Output "OpenMP wasn't found. Check the build logs." + Assert-Output $False + } } if ($env:COMPILER -eq "MSVC") { - Write-Output "Running tests with testthat.R" - cd R-package/tests - # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr, - # because something about the interaction between Invoke-R-Code-Redirect-Stderr - # and testthat results in failing tests not exiting with a non-0 exit code. - Rscript.exe --vanilla "testthat.R" ; Assert-Output $? + Write-Output "Running tests with testthat.R" + cd R-package/tests + # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr, + # because something about the interaction between Invoke-R-Code-Redirect-Stderr + # and testthat results in failing tests not exiting with a non-0 exit code. + Rscript.exe --vanilla "testthat.R" ; Assert-Output $? } Write-Output "No issues were found checking the R-package" diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1 index 87c214856212..f3015ae7d180 100644 --- a/.ci/test-windows.ps1 +++ b/.ci/test-windows.ps1 @@ -17,41 +17,41 @@ Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore [Void][System.IO.Directory]::CreateDirectory($env:TMPDIR) if ($env:TASK -eq "r-package") { - & .\.ci\test-r-package-windows.ps1 ; Assert-Output $? - Exit 0 + & .\.ci\test-r-package-windows.ps1 ; Assert-Output $? + Exit 0 } if ($env:TASK -eq "cpp-tests") { - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64 - cmake --build build --target testlightgbm --config Debug ; Assert-Output $? - .\Debug\testlightgbm.exe ; Assert-Output $? - Exit 0 + cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64 + cmake --build build --target testlightgbm --config Debug ; Assert-Output $? + .\Debug\testlightgbm.exe ; Assert-Output $? + Exit 0 } if ($env:TASK -eq "swig") { - $env:JAVA_HOME = $env:JAVA_HOME_8_X64 # there is pre-installed Eclipse Temurin 8 somewhere - $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed - Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl" - Add-Type -AssemblyName System.IO.Compression.FileSystem - [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $? - $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" - $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH - $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" - cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? - Write-Output "CMake build logs:" - Get-Content -Path "$BuildLogFileName" - Assert-Output $build_succeeded - $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe" - $checks_cnt = $checks.Matches.length - if ($checks_cnt -eq 0) { - Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs." - Assert-Output $False - } - cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? - if ($env:AZURE -eq "true") { - cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $? - } - Exit 0 + $env:JAVA_HOME = $env:JAVA_HOME_8_X64 # there is pre-installed Eclipse Temurin 8 somewhere + $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed + Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl" + Add-Type -AssemblyName System.IO.Compression.FileSystem + [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $? + $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" + $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH + $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" + cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? + Write-Output "CMake build logs:" + Get-Content -Path "$BuildLogFileName" + Assert-Output $build_succeeded + $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe" + $checks_cnt = $checks.Matches.length + if ($checks_cnt -eq 0) { + Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs." + Assert-Output $False + } + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? + if ($env:AZURE -eq "true") { + cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $? + } + Exit 0 } # setup for Python @@ -61,82 +61,82 @@ conda config --set always_yes yes --set changeps1 no conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]" if ($env:PYTHON_VERSION -eq "3.7") { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt" } elseif ($env:PYTHON_VERSION -eq "3.8") { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt" } else { - $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt" + $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt" } conda create ` - -y ` - -n $env:CONDA_ENV ` - --file $env:CONDA_REQUIREMENT_FILE ` - "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $? + -y ` + -n $env:CONDA_ENV ` + --file $env:CONDA_REQUIREMENT_FILE ` + "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $? if ($env:TASK -ne "bdist") { - conda activate $env:CONDA_ENV + conda activate $env:CONDA_ENV } cd $env:BUILD_SOURCESDIRECTORY if ($env:TASK -eq "regular") { - cmake -B build -S . -A x64 ; Assert-Output $? - cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? - sh ./build-python.sh install --precompile ; Assert-Output $? - cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY - cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY + cmake -B build -S . -A x64 ; Assert-Output $? + cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? + sh ./build-python.sh install --precompile ; Assert-Output $? + cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY + cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY } elseif ($env:TASK -eq "sdist") { - sh ./build-python.sh sdist ; Assert-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? - cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? + sh ./build-python.sh sdist ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? } elseif ($env:TASK -eq "bdist") { - # Import the Chocolatey profile module so that the RefreshEnv command - # invoked below properly updates the current PowerShell session environment. - $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - Import-Module "$module" ; Assert-Output $? - RefreshEnv + # Import the Chocolatey profile module so that the RefreshEnv command + # invoked below properly updates the current PowerShell session environment. + $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + Import-Module "$module" ; Assert-Output $? + RefreshEnv - Write-Output "Current OpenCL drivers:" - Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors + Write-Output "Current OpenCL drivers:" + Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors - conda activate $env:CONDA_ENV - sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $? - sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? - cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? - cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY + conda activate $env:CONDA_ENV + sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $? + sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? + cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? + cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { - if ($env:COMPILER -eq "MINGW") { - sh ./build-python.sh install --mingw ; Assert-Output $? - } else { - sh ./build-python.sh install; Assert-Output $? - } + if ($env:COMPILER -eq "MINGW") { + sh ./build-python.sh install --mingw ; Assert-Output $? + } else { + sh ./build-python.sh install; Assert-Output $? + } } if (($env:TASK -eq "sdist") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { - # cannot test C API with "sdist" task - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test" + # cannot test C API with "sdist" task + $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test" } else { - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests" + $tests = $env:BUILD_SOURCESDIRECTORY + "/tests" } if ($env:TASK -eq "bdist") { - # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py - $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1" + # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py + $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1" } pytest $tests ; Assert-Output $? if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide - @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" - (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" # prevent interactive window mode - conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" - foreach ($file in @(Get-ChildItem *.py)) { - @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file - python $file ; Assert-Output $? - } # run all examples - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks - (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb" - jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? # run all notebooks + cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide + @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" + (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" # prevent interactive window mode + conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" + foreach ($file in @(Get-ChildItem *.py)) { + @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file + python $file ; Assert-Output $? + } # run all examples + cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks + (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb" + jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? # run all notebooks } diff --git a/.ci/test.sh b/.ci/test.sh index 9b3e1ee3938d..f959af16ccf3 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -100,7 +100,7 @@ fi if [[ $TASK == "lint" ]]; then pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck" echo "Linting PowerShell code" - pwsh -file "./.ci/lint-powershell.ps1" || exit 0 + pwsh -file "./.ci/lint-powershell.ps1" || : conda create -q -y -n "${CONDA_ENV}" \ "${CONDA_PYTHON_REQUIREMENT}" \ 'cmakelint>=1.4.3' \ From 13f2e92bb0ac64f94d9b5016a33b5c34d2134204 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 3 Nov 2024 19:32:25 +0300 Subject: [PATCH 08/27] [ci] check JavaScript code with `biome` tool (#6711) * lint js code * hotfix * Update .editorconfig Co-authored-by: James Lamb --------- Co-authored-by: James Lamb --- .ci/lint-js.sh | 5 ++ .ci/{lint-python.sh => lint-python-bash.sh} | 0 .ci/test.sh | 7 +- .editorconfig | 11 ++- biome.json | 21 ++++++ docs/_static/js/script.js | 69 +++++++++++-------- .../binary_classification/forced_splits.json | 2 +- examples/regression/forced_bins.json | 4 +- examples/regression/forced_bins2.json | 2 +- python-package/README.rst | 2 +- 10 files changed, 81 insertions(+), 42 deletions(-) create mode 100644 .ci/lint-js.sh rename .ci/{lint-python.sh => lint-python-bash.sh} (100%) create mode 100644 biome.json diff --git a/.ci/lint-js.sh b/.ci/lint-js.sh new file mode 100644 index 000000000000..534f251620e2 --- /dev/null +++ b/.ci/lint-js.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e -E -u -o pipefail + +biome ci --config-path=./biome.json --diagnostic-level=info --error-on-warnings ./ diff --git a/.ci/lint-python.sh b/.ci/lint-python-bash.sh similarity index 100% rename from .ci/lint-python.sh rename to .ci/lint-python-bash.sh diff --git a/.ci/test.sh b/.ci/test.sh index f959af16ccf3..45ee65629744 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -103,6 +103,7 @@ if [[ $TASK == "lint" ]]; then pwsh -file "./.ci/lint-powershell.ps1" || : conda create -q -y -n "${CONDA_ENV}" \ "${CONDA_PYTHON_REQUIREMENT}" \ + 'biome>=1.9.3' \ 'cmakelint>=1.4.3' \ 'cpplint>=1.6.0' \ 'matplotlib-base>=3.9.1' \ @@ -113,12 +114,14 @@ if [[ $TASK == "lint" ]]; then 'r-lintr>=3.1.2' # shellcheck disable=SC1091 source activate "${CONDA_ENV}" - echo "Linting Python code" - bash ./.ci/lint-python.sh || exit 1 + echo "Linting Python and bash code" + bash ./.ci/lint-python-bash.sh || exit 1 echo "Linting R code" Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1 echo "Linting C++ code" bash ./.ci/lint-cpp.sh || exit 1 + echo "Linting JavaScript code" + bash ./.ci/lint-js.sh || exit 1 exit 0 fi diff --git a/.editorconfig b/.editorconfig index f4ae446b64bb..e7191b63c1d3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,22 +1,19 @@ root = true [*] -charset=utf-8 +charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true +end_of_line = lf indent_style = space indent_size = 2 -[*.{py,sh,js,ps1}] +[*.{py,sh,ps1,js,json}] indent_size = 4 -line_length = 120 +max_line_length = 120 skip = external_libs known_first_party = lightgbm -# Placeholder files -[{*.gitkeep,__init__.py}] -insert_final_newline = none - # Tabs matter for Makefile and .gitmodules [{makefile*,Makefile*,*.mk,*.mak,*.makefile,*.Makefile,GNUmakefile,BSDmakefile,make.bat,Makevars*,*.gitmodules}] indent_style = tab diff --git a/biome.json b/biome.json new file mode 100644 index 000000000000..5029d037189e --- /dev/null +++ b/biome.json @@ -0,0 +1,21 @@ +{ + "files": { + "ignore": [".mypy_cache/"] + }, + "formatter": { + "enabled": true, + "useEditorconfig": true + }, + "organizeImports": { + "enabled": true + }, + "linter": { + "enabled": true, + "rules": { + "all": true + } + }, + "javascript": { + "globals": ["$"] + } +} diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js index 3cfc90de887d..3f129501e06f 100644 --- a/docs/_static/js/script.js +++ b/docs/_static/js/script.js @@ -1,56 +1,69 @@ -$(function() { +$(() => { /* Use wider container for the page content */ - $('.wy-nav-content').each(function() { this.style.setProperty('max-width', 'none', 'important'); }); + $(".wy-nav-content").each(function () { + this.style.setProperty("max-width", "none", "important"); + }); /* List each class property item on a new line https://github.com/microsoft/LightGBM/issues/5073 */ - if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') !== -1) { - $('.py.property').each(function() { this.style.setProperty('display', 'inline', 'important'); }); + if (window.location.pathname.toLocaleLowerCase().indexOf("pythonapi") !== -1) { + $(".py.property").each(function () { + this.style.setProperty("display", "inline", "important"); + }); } /* Collapse specified sections in the installation guide */ - if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') !== -1) { - $('').appendTo('body'); - var collapsable = [ - '#build-threadless-version-not-recommended', - '#build-mpi-version', - '#build-gpu-version', - '#build-cuda-version', - '#build-java-wrapper', - '#build-c-unit-tests' + if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) { + $( + '', + ).appendTo("body"); + const collapsable = [ + "#build-threadless-version-not-recommended", + "#build-mpi-version", + "#build-gpu-version", + "#build-cuda-version", + "#build-java-wrapper", + "#build-c-unit-tests", ]; - $.each(collapsable, function(_, val) { - var header = val + ' > :header:first'; - var content = val + ' :not(:header:first)'; - $(header).addClass('closed'); + $.each(collapsable, (_, val) => { + const header = `${val} > :header:first`; + const content = `${val} :not(:header:first)`; + $(header).addClass("closed"); $(content).hide(); - $(header).click(function() { - $(header).toggleClass('closed opened'); + $(header).click(() => { + $(header).toggleClass("closed opened"); $(content).slideToggle(0); }); }); /* Uncollapse parent sections when nested section is specified in the URL or before navigate to it from navbar */ function uncollapse(section) { - section.parents().each((_, val) => { $(val).children('.closed').click(); }); + section.parents().each((_, val) => { + $(val).children(".closed").click(); + }); } uncollapse($(window.location.hash)); - $('.wy-menu.wy-menu-vertical li a.reference.internal').click(function() { - uncollapse($($(this).attr('href'))); + $(".wy-menu.wy-menu-vertical li a.reference.internal").click(function () { + uncollapse($($(this).attr("href"))); }); /* Modify src and href attrs of artifacts badge */ function modifyBadge(src, href) { - $('img[alt="download artifacts"]').each(function() { + $('img[alt="download artifacts"]').each(function () { this.src = src; this.parentNode.href = href; }); } /* Initialize artifacts badge */ - modifyBadge('./_static/images/artifacts-fetching.svg', '#'); + modifyBadge("./_static/images/artifacts-fetching.svg", "#"); /* Fetch latest buildId and construct artifacts badge */ - $.getJSON('https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7', function(data) { - modifyBadge('./_static/images/artifacts-download.svg', - 'https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/' + data['value'][0]['id'] + '/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip'); - }); + $.getJSON( + "https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7", + (data) => { + modifyBadge( + "./_static/images/artifacts-download.svg", + `https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/${data.value[0].id}/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip`, + ); + }, + ); } }); diff --git a/examples/binary_classification/forced_splits.json b/examples/binary_classification/forced_splits.json index 1ee410c9789e..b09391a87f49 100644 --- a/examples/binary_classification/forced_splits.json +++ b/examples/binary_classification/forced_splits.json @@ -1,6 +1,6 @@ { "feature": 25, - "threshold": 1.30, + "threshold": 1.3, "left": { "feature": 26, "threshold": 0.85 diff --git a/examples/regression/forced_bins.json b/examples/regression/forced_bins.json index 1ee0a49d727c..19722afbbb4b 100644 --- a/examples/regression/forced_bins.json +++ b/examples/regression/forced_bins.json @@ -1,10 +1,10 @@ [ { "feature": 0, - "bin_upper_bound": [ 0.3, 0.35, 0.4 ] + "bin_upper_bound": [0.3, 0.35, 0.4] }, { "feature": 1, - "bin_upper_bound": [ -0.1, -0.15, -0.2 ] + "bin_upper_bound": [-0.1, -0.15, -0.2] } ] diff --git a/examples/regression/forced_bins2.json b/examples/regression/forced_bins2.json index f4dca0ccaf34..d6454f8a4ae9 100644 --- a/examples/regression/forced_bins2.json +++ b/examples/regression/forced_bins2.json @@ -1,6 +1,6 @@ [ { "feature": 0, - "bin_upper_bound": [ 0.19, 0.39, 0.59, 0.79 ] + "bin_upper_bound": [0.19, 0.39, 0.59, 0.79] } ] diff --git a/python-package/README.rst b/python-package/README.rst index 0e007e5ee7ec..face6bba6b74 100644 --- a/python-package/README.rst +++ b/python-package/README.rst @@ -286,7 +286,7 @@ To check that a contribution to the package matches its style expectations, run .. code:: sh - bash .ci/lint-python.sh + bash .ci/lint-python-bash.sh .. |License| image:: https://img.shields.io/github/license/microsoft/lightgbm.svg :target: https://github.com/microsoft/LightGBM/blob/master/LICENSE From e0071911c8327df9a031ba7e61e9a2c6cff43d76 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 3 Nov 2024 22:12:20 +0300 Subject: [PATCH 09/27] [ci] check PowerShell scripts with PSScriptAnalyzer (part 3) (#6710) Co-authored-by: James Lamb --- .ci/install-opencl.ps1 | 12 +++-- .ci/lint-powershell.ps1 | 2 +- .ci/test-r-package-windows.ps1 | 94 ++++++++++++++++++++++++++-------- .ci/test-windows.ps1 | 77 ++++++++++++++++++---------- .ci/test.sh | 2 +- 5 files changed, 134 insertions(+), 53 deletions(-) diff --git a/.ci/install-opencl.ps1 b/.ci/install-opencl.ps1 index e48f24e4bf05..b69ed575f0fb 100644 --- a/.ci/install-opencl.ps1 +++ b/.ci/install-opencl.ps1 @@ -4,7 +4,11 @@ $installer = "AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe" Write-Output "Downloading OpenCL platform installer" $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed -Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer" +$params = @{ + OutFile = "$installer" + Uri = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer" +} +Invoke-WebRequest @params if (Test-Path "$installer") { Write-Output "Successfully downloaded OpenCL platform installer" @@ -17,10 +21,12 @@ if (Test-Path "$installer") { # Install OpenCL platform from installer executable Write-Output "Running OpenCL installer" -Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait } +Invoke-Command -ScriptBlock { + Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait +} $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors -if ($property -eq $null) { +if ($null -eq $property) { Write-Output "Unable to install OpenCL CPU platform" Write-Output "OpenCL installation log:" Get-Content "opencl.log" diff --git a/.ci/lint-powershell.ps1 b/.ci/lint-powershell.ps1 index b2e045917ab6..332a6e040319 100644 --- a/.ci/lint-powershell.ps1 +++ b/.ci/lint-powershell.ps1 @@ -53,4 +53,4 @@ $settings = @{ } } -Invoke-ScriptAnalyzer -Path "$env:BUILD_DIRECTORY/.ci" -Recurse -EnableExit -Settings $settings +Invoke-ScriptAnalyzer -Path ./ -Recurse -EnableExit -Settings $settings diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index 1dff55c2a9aa..1ce698a49c72 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -97,7 +97,13 @@ $env:CMAKE_VERSION = "3.30.0" $env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/' $env:R_LIBS = "$env:R_LIB_PATH" $env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation" -$env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH +$env:PATH = @( + "$env:RTOOLS_BIN", + "$env:RTOOLS_MINGW_BIN", + "$env:R_LIB_PATH/R/bin/x64", + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin", + "$env:PATH" +) -join ";" if ([version]$env:R_VERSION -lt [version]"4.0") { $env:CRAN_MIRROR = "https://cran-archive.r-project.org" } else { @@ -116,24 +122,50 @@ if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" } -cd $env:BUILD_SOURCESDIRECTORY +Set-Location "$env:BUILD_SOURCESDIRECTORY" tzutil /s "GMT Standard Time" -[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH) -[Void][System.IO.Directory]::CreateDirectory($env:CMAKE_PATH) +[Void][System.IO.Directory]::CreateDirectory("$env:R_LIB_PATH") +[Void][System.IO.Directory]::CreateDirectory("$env:CMAKE_PATH") # download R, RTools and CMake Write-Output "Downloading R, Rtools and CMake" -Get-File-With-Tenacity -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe" -Get-File-With-Tenacity -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe" -Get-File-With-Tenacity -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip" +$params = @{ + url = "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" + destfile = "R-win.exe" +} +Get-File-With-Tenacity @params + +$params = @{ + url = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" + destfile = "Rtools.exe" +} +Get-File-With-Tenacity @params + +$params = @{ + url = "https://github.com/Kitware/CMake/releases/download/v{0}/cmake-{0}-windows-x86_64.zip" -f $env:CMAKE_VERSION + destfile = "$env:CMAKE_PATH/cmake.zip" +} +Get-File-With-Tenacity @params # Install R Write-Output "Installing R" -Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Assert-Output $? +$params = @{ + FilePath = "R-win.exe" + NoNewWindow = $true + Wait = $true + ArgumentList = "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" +} +Start-Process @params ; Assert-Output $? Write-Output "Done installing R" Write-Output "Installing Rtools" -Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Assert-Output $? +$params = @{ + FilePath = "Rtools.exe" + NoNewWindow = $true + Wait = $true + ArgumentList = "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" +} +Start-Process @params; Assert-Output $? Write-Output "Done installing Rtools" Write-Output "Installing CMake" @@ -144,8 +176,16 @@ Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore Write-Output "Done installing CMake" Write-Output "Installing dependencies" -$packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')" -Invoke-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Assert-Output $? +$packages = -join @( + "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ", + "dependencies = c('Imports', 'Depends', 'LinkingTo')" +) +$params = -join @( + "options(install.packages.check.source = 'no'); ", + "install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', ", + "lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" +) +Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $? Write-Output "Building R-package" @@ -168,16 +208,21 @@ if ($env:COMPILER -ne "MSVC") { Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN" Assert-Output $false } - Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $? + Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')" + Assert-Output $? } elseif ($env:R_BUILD_TYPE -eq "cran") { # NOTE: gzip and tar are needed to create a CRAN package on Windows, but # some flavors of tar.exe can fail in some settings on Windows. # Putting the msys64 utilities at the beginning of PATH temporarily to be # sure they're used for that purpose. if ($env:R_MAJOR_VERSION -eq "3") { - $env:PATH = "C:\msys64\usr\bin;" + $env:PATH + $env:PATH = @("C:\msys64\usr\bin", "$env:PATH") -join ";" } - Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $? + $params = -join @( + "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', ", + "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" + ) + Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $? Remove-From-Path ".*msys64.*" # Test CRAN source .tar.gz in a directory that is not this repo or below it. # When people install.packages('lightgbm'), they won't have the LightGBM @@ -186,7 +231,7 @@ if ($env:COMPILER -ne "MSVC") { $R_CMD_CHECK_DIR = "tmp-r-cmd-check" New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null - cd "C:\$R_CMD_CHECK_DIR\" + Set-Location "C:\$R_CMD_CHECK_DIR\" } Write-Output "Running R CMD check" @@ -196,7 +241,11 @@ if ($env:COMPILER -ne "MSVC") { } else { $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" } - Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $? + $params = -join ( + "result <- processx::run(command = 'R.exe', args = $check_args, ", + "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" + ) + Invoke-R-Code-Redirect-Stderr $params ; $check_succeeded = $? Write-Output "R CMD check build logs:" $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out" @@ -206,10 +255,9 @@ if ($env:COMPILER -ne "MSVC") { Write-Output "Looking for issues with R CMD check results" if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) { - echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" + Write-Output "NOTEs, WARNINGs, or ERRORs have been found by R CMD check" Assert-Output $False } - } else { $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt" Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $? @@ -219,7 +267,7 @@ if ($env:COMPILER -ne "MSVC") { Assert-Output $install_succeeded # some errors are not raised above, but can be found in the logs if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) { - echo "ERRORs have been found installing lightgbm" + Write-Output "ERRORs have been found installing lightgbm" Assert-Output $False } } @@ -229,7 +277,11 @@ if ($env:TOOLCHAIN -ne "MSVC") { $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION" $checks_cnt = $checks.Matches.length } else { - $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" + $checksParams = @{ + Path = "${INSTALL_LOG_FILE_NAME}" + Pattern = "R version passed into FindLibR.* $env:R_WINDOWS_VERSION" + } + $checks = Select-String @checksParams $checks_cnt = $checks.Matches.length } if ($checks_cnt -eq 0) { @@ -299,7 +351,7 @@ if ($env:R_BUILD_TYPE -eq "cmake") { if ($env:COMPILER -eq "MSVC") { Write-Output "Running tests with testthat.R" - cd R-package/tests + Set-Location R-package/tests # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr, # because something about the interaction between Invoke-R-Code-Redirect-Stderr # and testthat results in failing tests not exiting with a non-0 exit code. diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1 index f3015ae7d180..264c13961aff 100644 --- a/.ci/test-windows.ps1 +++ b/.ci/test-windows.ps1 @@ -31,11 +31,19 @@ if ($env:TASK -eq "cpp-tests") { if ($env:TASK -eq "swig") { $env:JAVA_HOME = $env:JAVA_HOME_8_X64 # there is pre-installed Eclipse Temurin 8 somewhere $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed - Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl" + $params = @{ + Uri = "https://sourceforge.net/projects/swig/files/latest/download" + OutFile = "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip" + UserAgent = "curl" + } + Invoke-WebRequest @params Add-Type -AssemblyName System.IO.Compression.FileSystem - [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $? + [System.IO.Compression.ZipFile]::ExtractToDirectory( + "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", + "$env:BUILD_SOURCESDIRECTORY/swig" + ) ; Assert-Output $? $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig" - $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH + $env:PATH = @("$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder", "$env:PATH") -join ";" $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log" cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $? Write-Output "CMake build logs:" @@ -68,30 +76,30 @@ if ($env:PYTHON_VERSION -eq "3.7") { $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt" } -conda create ` - -y ` - -n $env:CONDA_ENV ` - --file $env:CONDA_REQUIREMENT_FILE ` - "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $? +$condaParams = @( + "-y", + "-n", "$env:CONDA_ENV", + "--file", "$env:CONDA_REQUIREMENT_FILE", + "python=$env:PYTHON_VERSION[build=*cpython]" +) +conda create @condaParams ; Assert-Output $? if ($env:TASK -ne "bdist") { conda activate $env:CONDA_ENV } -cd $env:BUILD_SOURCESDIRECTORY +Set-Location "$env:BUILD_SOURCESDIRECTORY" if ($env:TASK -eq "regular") { cmake -B build -S . -A x64 ; Assert-Output $? cmake --build build --target ALL_BUILD --config Release ; Assert-Output $? sh ./build-python.sh install --precompile ; Assert-Output $? - cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY - cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY -} -elseif ($env:TASK -eq "sdist") { + cp ./Release/lib_lightgbm.dll "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" + cp ./Release/lightgbm.exe "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" +} elseif ($env:TASK -eq "sdist") { sh ./build-python.sh sdist ; Assert-Output $? sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? - cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? -} -elseif ($env:TASK -eq "bdist") { + Set-Location dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $? +} elseif ($env:TASK -eq "bdist") { # Import the Chocolatey profile module so that the RefreshEnv command # invoked below properly updates the current PowerShell session environment. $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" @@ -104,8 +112,8 @@ elseif ($env:TASK -eq "bdist") { conda activate $env:CONDA_ENV sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $? sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $? - cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? - cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY + Set-Location dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $? + cp @(Get-ChildItem *py3-none-win_amd64.whl) "$env:BUILD_ARTIFACTSTAGINGDIRECTORY" } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { if ($env:COMPILER -eq "MINGW") { sh ./build-python.sh install --mingw ; Assert-Output $? @@ -116,9 +124,9 @@ elseif ($env:TASK -eq "bdist") { if (($env:TASK -eq "sdist") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { # cannot test C API with "sdist" task - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test" + $tests = "$env:BUILD_SOURCESDIRECTORY/tests/python_package_test" } else { - $tests = $env:BUILD_SOURCESDIRECTORY + "/tests" + $tests = "$env:BUILD_SOURCESDIRECTORY/tests" } if ($env:TASK -eq "bdist") { # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py @@ -128,15 +136,30 @@ if ($env:TASK -eq "bdist") { pytest $tests ; Assert-Output $? if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) { - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide + Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide" @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" - (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" # prevent interactive window mode + # Prevent interactive window mode + (Get-Content "plot_example.py").replace( + 'graph.render(view=True)', + 'graph.render(view=False)' + ) | Set-Content "plot_example.py" conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2" + # Run all examples foreach ($file in @(Get-ChildItem *.py)) { - @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file + @( + "import sys, warnings", + -join @( + "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: ", + "sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))" + ) + ) + (Get-Content $file) | Set-Content $file python $file ; Assert-Output $? - } # run all examples - cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks - (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb" - jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? # run all notebooks + } + # Run all notebooks + Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks" + (Get-Content "interactive_plot_example.ipynb").replace( + 'INTERACTIVE = False', + 'assert False, \"Interactive mode disabled\"' + ) | Set-Content "interactive_plot_example.ipynb" + jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $? } diff --git a/.ci/test.sh b/.ci/test.sh index 45ee65629744..cc8831f94c09 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -100,7 +100,7 @@ fi if [[ $TASK == "lint" ]]; then pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck" echo "Linting PowerShell code" - pwsh -file "./.ci/lint-powershell.ps1" || : + pwsh -file ./.ci/lint-powershell.ps1 || exit 1 conda create -q -y -n "${CONDA_ENV}" \ "${CONDA_PYTHON_REQUIREMENT}" \ 'biome>=1.9.3' \ From 5151fe85f08e5dccff7d48242dddace51f9c8ede Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Nov 2024 06:04:38 -0600 Subject: [PATCH 10/27] [ci] [R-package] re-enable 'rchk' checks (#6713) * intentionally miss an unprotect() * re-enable rchk * grep for errors * restore all CI --- .github/workflows/r_package.yml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 1758583ad8e4..8811f53b61c0 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -274,6 +274,7 @@ jobs: - clang19 - gcc14 - intel + - rchk runs-on: ubuntu-latest container: ghcr.io/r-hub/containers/${{ matrix.image }}:latest steps: @@ -311,8 +312,32 @@ jobs: - name: Install packages and run tests shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh + + # 'rchk' isn't run through 'R CMD check', use the approach documented at + # https://r-hub.github.io/containers/local.html + if [[ "${{ matrix.image }}" =~ "rchk" ]]; then + r-check "$(pwd)" \ + | tee ./rchk-logs.txt 2>&1 + + # the '-v' exceptions below are from R/rchk itself and not LightGBM: + # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156 + if grep -E '\[PB\]|ERROR' ./rchk-logs.txt \ + | grep -v 'too many states' \ + > /dev/null; \ + then + echo "rchk found issues" + exit 1 + else + echo "rchk did not find any issues" + exit 0 + fi + fi + + # 'testthat' is not needed by 'rchk', so avoid installing it until here + Rscript -e "install.packages('testthat', repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + if [[ "${{ matrix.image }}" =~ "clang" ]]; then # allowing the following NOTEs (produced by default in the clang images): # From 4531ff548d43a8c7a35477b379f840e587cc2719 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 14 Nov 2024 20:35:16 -0600 Subject: [PATCH 11/27] [python-package] adapt to scikit-learn 1.6 testing changes, pin more packages in R 3.6 CI jobs (#6718) --- .ci/install-old-r-packages.R | 79 +++++++++++++++++++++++ .ci/test-r-package.sh | 4 +- python-package/lightgbm/compat.py | 10 +++ python-package/lightgbm/sklearn.py | 13 +++- tests/python_package_test/test_sklearn.py | 38 +++++++++-- 5 files changed, 135 insertions(+), 9 deletions(-) create mode 100644 .ci/install-old-r-packages.R diff --git a/.ci/install-old-r-packages.R b/.ci/install-old-r-packages.R new file mode 100644 index 000000000000..e402c4d5ca12 --- /dev/null +++ b/.ci/install-old-r-packages.R @@ -0,0 +1,79 @@ +# [description] +# +# Installs a pinned set of packages that worked together +# as of the last R 3.6 release. +# + +.install_packages <- function(packages) { + install.packages( # nolint: undesirable_function + pkgs = paste( # nolint: paste + "https://cran.r-project.org/src/contrib/Archive" + , packages + , sep = "/" + ) + , dependencies = FALSE + , lib = Sys.getenv("R_LIBS") + , repos = NULL + ) +} + +# when confronted with a bunch of URLs like this, install.packages() sometimes +# struggles to determine install order... so install packages in batches here, +# starting from the root of the dependency graph and working up + +# there was only a single release of {praise}, so there is no contrib/Archive URL for it +install.packages( # nolint: undesirable_function + pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz" + , dependencies = FALSE + , lib = Sys.getenv("R_LIBS") + , repos = NULL +) + +.install_packages(c( + "brio/brio_1.1.4.tar.gz" # nolint: non_portable_path + , "cli/cli_3.6.2.tar.gz" # nolint: non_portable_path + , "crayon/crayon_1.5.2.tar.gz" # nolint: non_portable_path + , "digest/digest_0.6.36.tar.gz" # nolint: non_portable_path + , "evaluate/evaluate_0.23.tar.gz" # nolint: non_portable_path + , "fansi/fansi_1.0.5.tar.gz" # nolint: non_portable_path + , "fs/fs_1.6.4.tar.gz" # nolint: non_portable_path + , "glue/glue_1.7.0.tar.gz" # nolint: non_portable_path + , "jsonlite/jsonlite_1.8.8.tar.gz" # nolint: non_portable_path + , "lattice/lattice_0.20-41.tar.gz" # nolint: non_portable_path + , "magrittr/magrittr_2.0.2.tar.gz" # nolint: non_portable_path + , "pkgconfig/pkgconfig_2.0.2.tar.gz" # nolint: non_portable_path + , "ps/ps_1.8.0.tar.gz" # nolint: non_portable_path + , "R6/R6_2.5.0.tar.gz" # nolint: non_portable_path + , "rlang/rlang_1.1.3.tar.gz" # nolint: non_portable_path + , "rprojroot/rprojroot_2.0.3.tar.gz" # nolint: non_portable_path + , "utf8/utf8_1.2.3.tar.gz" # nolint: non_portable_path + , "withr/withr_3.0.1.tar.gz" # nolint: non_portable_path +)) + +.install_packages(c( + "desc/desc_1.4.2.tar.gz" # nolint: non_portable_path + , "diffobj/diffobj_0.3.4.tar.gz" # nolint: non_portable_path + , "lifecycle/lifecycle_1.0.3.tar.gz" # nolint: non_portable_path + , "processx/processx_3.8.3.tar.gz" # nolint: non_portable_path +)) + +.install_packages(c( + "callr/callr_3.7.5.tar.gz" # nolint: non_portable_path + , "vctrs/vctrs_0.6.4.tar.gz" # nolint: non_portable_path +)) + +.install_packages(c( + "pillar/pillar_1.8.1.tar.gz" # nolint: non_portable_path + , "tibble/tibble_3.2.0.tar.gz" # nolint: non_portable_path +)) + +.install_packages(c( + "pkgbuild/pkgbuild_1.4.4.tar.gz" # nolint: non_portable_path + , "rematch2/rematch2_2.1.1.tar.gz" # nolint: non_portable_path + , "waldo/waldo_0.5.3.tar.gz" # nolint: non_portable_path +)) + +.install_packages(c( + "pkgload/pkgload_1.3.4.tar.gz" # nolint: non_portable_path + , "testthat/testthat_3.2.1.tar.gz" # nolint: non_portable_path +)) diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index ae205213d787..a076fab0186c 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -108,10 +108,10 @@ if [[ $OS_NAME == "macos" ]]; then export R_TIDYCMD=/usr/local/bin/tidy fi -# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6 +# fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6 # "Warning: dependency ‘lattice’ is not available" if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')" + Rscript --vanilla ./.ci/install-old-r-packages.R else # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. # This should be unnecessary on R >=4.4.0 diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py index 96dee6522572..0b9444b0ecbf 100644 --- a/python-package/lightgbm/compat.py +++ b/python-package/lightgbm/compat.py @@ -14,6 +14,14 @@ from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import assert_all_finite, check_array, check_X_y + # sklearn.utils Tags types can be imported unconditionally once + # lightgbm's minimum scikit-learn version is 1.6 or higher + try: + from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags + from sklearn.utils import RegressorTags as _sklearn_RegressorTags + except ImportError: + _sklearn_ClassifierTags = None + _sklearn_RegressorTags = None try: from sklearn.exceptions import NotFittedError from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold @@ -140,6 +148,8 @@ class _LGBMRegressorBase: # type: ignore _LGBMCheckClassificationTargets = None _LGBMComputeSampleWeight = None _LGBMValidateData = None + _sklearn_ClassifierTags = None + _sklearn_RegressorTags = None _sklearn_version = None # additional scikit-learn imports only for type hints diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index c4d1200e99e4..614e3c3cbe7f 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -40,6 +40,8 @@ _LGBMModelBase, _LGBMRegressorBase, _LGBMValidateData, + _sklearn_ClassifierTags, + _sklearn_RegressorTags, _sklearn_version, dt_DataTable, pd_DataFrame, @@ -703,7 +705,6 @@ def _update_sklearn_tags_from_dict( tags.input_tags.allow_nan = tags_dict["allow_nan"] tags.input_tags.sparse = "sparse" in tags_dict["X_types"] tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"] - tags._xfail_checks = tags_dict["_xfail_checks"] return tags def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]: @@ -1291,7 +1292,10 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - return LGBMModel.__sklearn_tags__(self) + tags = LGBMModel.__sklearn_tags__(self) + tags.estimator_type = "regressor" + tags.regressor_tags = _sklearn_RegressorTags(multi_label=False) + return tags def fit( # type: ignore[override] self, @@ -1350,7 +1354,10 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - return LGBMModel.__sklearn_tags__(self) + tags = LGBMModel.__sklearn_tags__(self) + tags.estimator_type = "classifier" + tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False) + return tags def fit( # type: ignore[override] self, diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 6eca66ff20d3..d187e9df5a9f 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -17,11 +17,18 @@ from sklearn.metrics import accuracy_score, log_loss, mean_squared_error, r2_score from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split from sklearn.multioutput import ClassifierChain, MultiOutputClassifier, MultiOutputRegressor, RegressorChain -from sklearn.utils.estimator_checks import parametrize_with_checks +from sklearn.utils.estimator_checks import parametrize_with_checks as sklearn_parametrize_with_checks from sklearn.utils.validation import check_is_fitted import lightgbm as lgb -from lightgbm.compat import DATATABLE_INSTALLED, PANDAS_INSTALLED, dt_DataTable, pd_DataFrame, pd_Series +from lightgbm.compat import ( + DATATABLE_INSTALLED, + PANDAS_INSTALLED, + _sklearn_version, + dt_DataTable, + pd_DataFrame, + pd_Series, +) from .utils import ( assert_silent, @@ -35,6 +42,9 @@ softmax, ) +SKLEARN_MAJOR, SKLEARN_MINOR, *_ = _sklearn_version.split(".") +SKLEARN_VERSION_GTE_1_6 = (int(SKLEARN_MAJOR), int(SKLEARN_MINOR)) >= (1, 6) + decreasing_generator = itertools.count(0, -1) estimator_classes = (lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker) task_to_model_factory = { @@ -1432,7 +1442,28 @@ def test_getting_feature_names_in_pd_input(estimator_class): np.testing.assert_array_equal(model.feature_names_in_, X.columns) -@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()]) +# Starting with scikit-learn 1.6 (https://github.com/scikit-learn/scikit-learn/pull/30149), +# the only API for marking estimator tests as expected to fail is to pass a keyword argument +# to parametrize_with_checks(). That function didn't accept additional arguments in earlier +# versions. +# +# This block defines a patched version of parametrize_with_checks() so lightgbm's tests +# can be compatible with scikit-learn <1.6 and >=1.6. +# +# This should be removed once minimum supported scikit-learn version is at least 1.6. +if SKLEARN_VERSION_GTE_1_6: + parametrize_with_checks = sklearn_parametrize_with_checks +else: + + def parametrize_with_checks(estimator, *args, **kwargs): + return sklearn_parametrize_with_checks(estimator) + + +def _get_expected_failed_tests(estimator): + return estimator._more_tags()["_xfail_checks"] + + +@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()], expected_failed_checks=_get_expected_failed_tests) def test_sklearn_integration(estimator, check): estimator.set_params(min_child_samples=1, min_data_in_bin=1) check(estimator) @@ -1457,7 +1488,6 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato assert sklearn_tags.input_tags.allow_nan is True assert sklearn_tags.input_tags.sparse is True assert sklearn_tags.target_tags.one_d_labels is True - assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"] @pytest.mark.parametrize("task", all_tasks) From 83c0ff3de1925b0e2d4831a9ccb6ffc196aa795b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 15 Nov 2024 06:30:23 -0600 Subject: [PATCH 12/27] [docs] add note about pyodide support (#6715) * [docs] add note about pyodide support * Update README.md Co-authored-by: Nikita Titov --------- Co-authored-by: Nikita Titov --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f151c9db2ebe..39108559e8bc 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,8 @@ lightgbm-transform (feature transformation binding): https://github.com/microsof `postgresml` (LightGBM training and prediction in SQL, via a Postgres extension): https://github.com/postgresml/postgresml +`pyodide` (run `lightgbm` Python-package in a web browser): https://github.com/pyodide/pyodide + `vaex-ml` (Python DataFrame library with its own interface to LightGBM): https://github.com/vaexio/vaex Support From 27b00d74169ac7756c48d7b6878d66fa5d678530 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 29 Nov 2024 23:34:39 -0600 Subject: [PATCH 13/27] [ci] [python-package] [R-package] adapt to scikit-learn check_sample_weight_equivalence changes, stop testing against R 3.6 on Linux (#6733) --- .ci/install-old-r-packages.R | 79 ------------------------------ .ci/test-r-package.sh | 21 ++------ .github/workflows/r_package.yml | 19 +------ python-package/lightgbm/sklearn.py | 18 ++++--- 4 files changed, 18 insertions(+), 119 deletions(-) delete mode 100644 .ci/install-old-r-packages.R diff --git a/.ci/install-old-r-packages.R b/.ci/install-old-r-packages.R deleted file mode 100644 index e402c4d5ca12..000000000000 --- a/.ci/install-old-r-packages.R +++ /dev/null @@ -1,79 +0,0 @@ -# [description] -# -# Installs a pinned set of packages that worked together -# as of the last R 3.6 release. -# - -.install_packages <- function(packages) { - install.packages( # nolint: undesirable_function - pkgs = paste( # nolint: paste - "https://cran.r-project.org/src/contrib/Archive" - , packages - , sep = "/" - ) - , dependencies = FALSE - , lib = Sys.getenv("R_LIBS") - , repos = NULL - ) -} - -# when confronted with a bunch of URLs like this, install.packages() sometimes -# struggles to determine install order... so install packages in batches here, -# starting from the root of the dependency graph and working up - -# there was only a single release of {praise}, so there is no contrib/Archive URL for it -install.packages( # nolint: undesirable_function - pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz" - , dependencies = FALSE - , lib = Sys.getenv("R_LIBS") - , repos = NULL -) - -.install_packages(c( - "brio/brio_1.1.4.tar.gz" # nolint: non_portable_path - , "cli/cli_3.6.2.tar.gz" # nolint: non_portable_path - , "crayon/crayon_1.5.2.tar.gz" # nolint: non_portable_path - , "digest/digest_0.6.36.tar.gz" # nolint: non_portable_path - , "evaluate/evaluate_0.23.tar.gz" # nolint: non_portable_path - , "fansi/fansi_1.0.5.tar.gz" # nolint: non_portable_path - , "fs/fs_1.6.4.tar.gz" # nolint: non_portable_path - , "glue/glue_1.7.0.tar.gz" # nolint: non_portable_path - , "jsonlite/jsonlite_1.8.8.tar.gz" # nolint: non_portable_path - , "lattice/lattice_0.20-41.tar.gz" # nolint: non_portable_path - , "magrittr/magrittr_2.0.2.tar.gz" # nolint: non_portable_path - , "pkgconfig/pkgconfig_2.0.2.tar.gz" # nolint: non_portable_path - , "ps/ps_1.8.0.tar.gz" # nolint: non_portable_path - , "R6/R6_2.5.0.tar.gz" # nolint: non_portable_path - , "rlang/rlang_1.1.3.tar.gz" # nolint: non_portable_path - , "rprojroot/rprojroot_2.0.3.tar.gz" # nolint: non_portable_path - , "utf8/utf8_1.2.3.tar.gz" # nolint: non_portable_path - , "withr/withr_3.0.1.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "desc/desc_1.4.2.tar.gz" # nolint: non_portable_path - , "diffobj/diffobj_0.3.4.tar.gz" # nolint: non_portable_path - , "lifecycle/lifecycle_1.0.3.tar.gz" # nolint: non_portable_path - , "processx/processx_3.8.3.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "callr/callr_3.7.5.tar.gz" # nolint: non_portable_path - , "vctrs/vctrs_0.6.4.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pillar/pillar_1.8.1.tar.gz" # nolint: non_portable_path - , "tibble/tibble_3.2.0.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pkgbuild/pkgbuild_1.4.4.tar.gz" # nolint: non_portable_path - , "rematch2/rematch2_2.1.1.tar.gz" # nolint: non_portable_path - , "waldo/waldo_0.5.3.tar.gz" # nolint: non_portable_path -)) - -.install_packages(c( - "pkgload/pkgload_1.3.4.tar.gz" # nolint: non_portable_path - , "testthat/testthat_3.2.1.tar.gz" # nolint: non_portable_path -)) diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index a076fab0186c..2e414ec0d282 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -20,12 +20,7 @@ fi # Get details needed for installing R components R_MAJOR_VERSION="${R_VERSION%.*}" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - export R_MAC_VERSION=3.6.3 - export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/R-${R_MAC_VERSION}.nn.pkg - export R_LINUX_VERSION="3.6.3-1bionic" - export R_APT_REPO="bionic-cran35/" -elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then +if [[ "${R_MAJOR_VERSION}" == "4" ]]; then export R_MAC_VERSION=4.3.1 export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-${ARCH}/base/R-${R_MAC_VERSION}-${ARCH}.pkg export R_LINUX_VERSION="4.3.1-1.2204.0" @@ -108,16 +103,10 @@ if [[ $OS_NAME == "macos" ]]; then export R_TIDYCMD=/usr/local/bin/tidy fi -# fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6 -# "Warning: dependency ‘lattice’ is not available" -if [[ "${R_MAJOR_VERSION}" == "3" ]]; then - Rscript --vanilla ./.ci/install-old-r-packages.R -else - # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. - # This should be unnecessary on R >=4.4.0 - # ref: https://github.com/microsoft/LightGBM/issues/6433 - Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" -fi +# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. +# This should be unnecessary on R >=4.4.0 +# ref: https://github.com/microsoft/LightGBM/issues/6433 +Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')" # manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0 # ref: https://github.com/microsoft/LightGBM/issues/6433 diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 8811f53b61c0..66e05a18ba1f 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -14,10 +14,6 @@ concurrency: cancel-in-progress: true env: - # https://github.com/actions/checkout/issues/1590#issuecomment-2207052044 - # - # this could be removed (hopefully) when R 3.6 support is removed - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true # in CMake-driven builds, parallelize compilation CMAKE_BUILD_PARALLEL_LEVEL: 4 # on Debian-based images, avoid interactive prompts @@ -48,12 +44,6 @@ jobs: ################ # CMake builds # ################ - - os: ubuntu-latest - task: r-package - compiler: gcc - r_version: 3.6 - build_type: cmake - container: 'ubuntu:18.04' - os: ubuntu-latest task: r-package compiler: gcc @@ -174,19 +164,12 @@ jobs: run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 5 submodules: true - name: Install pandoc uses: r-lib/actions/setup-pandoc@v2 - if: matrix.container != 'ubuntu:18.04' - # R 3.6 binary isn't easily available on Ubuntu 18.04, - # but setup-pandoc>=2.7.1 is uses a too-new glibc for it. - # ref: https://github.com/microsoft/LightGBM/issues/6298 - - name: Install pandoc - uses: r-lib/actions/setup-pandoc@v2.6.0 - if: matrix.container == 'ubuntu:18.04' - name: Install tinytex if: startsWith(matrix.os, 'windows') uses: r-lib/actions/setup-tinytex@v2 diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 614e3c3cbe7f..d730b66c3556 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -673,6 +673,15 @@ def __init__( # is >=1.6. # ref: https://github.com/microsoft/LightGBM/pull/6651 def _more_tags(self) -> Dict[str, Any]: + check_sample_weight_str = ( + "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " + "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " + "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " + "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." + ) + # "check_sample_weight_equivalence" can be removed when lightgbm's + # minimum supported scikit-learn version is at least 1.6 + # ref: https://github.com/scikit-learn/scikit-learn/pull/30137 return { "allow_nan": True, "X_types": ["2darray", "sparse", "1dlabels"], @@ -680,12 +689,9 @@ def _more_tags(self) -> Dict[str, Any]: "check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes " "cannot be set in __init__: " "(see https://github.com/microsoft/LightGBM/issues/2628)", - "check_sample_weight_equivalence": ( - "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. " - "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' " - "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution " - "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)." - ), + "check_sample_weight_equivalence": check_sample_weight_str, + "check_sample_weight_equivalence_on_dense_data": check_sample_weight_str, + "check_sample_weight_equivalence_on_sparse_data": check_sample_weight_str, }, } From 784f38415d4dd08ccefe2a536d08971020672cca Mon Sep 17 00:00:00 2001 From: Oliver Borchert Date: Sun, 1 Dec 2024 05:51:56 +0100 Subject: [PATCH 14/27] [ci] Introduce `typos` pre-commit hook (#6564) Co-authored-by: Nikita Titov --- .ci/test-r-package-windows.ps1 | 2 +- .github/workflows/lock.yml | 2 +- .pre-commit-config.yaml | 8 ++++- .typos.toml | 21 +++++++++++ CMakeLists.txt | 2 +- R-package/R/lgb.Booster.R | 4 +-- R-package/R/lgb.importance.R | 2 +- R-package/R/lgb.model.dt.tree.R | 2 +- R-package/R/lightgbm.R | 2 +- R-package/demo/cross_validation.R | 2 +- R-package/demo/early_stopping.R | 2 +- R-package/man/lgb.configure_fast_predict.Rd | 4 +-- R-package/man/lgb.importance.Rd | 2 +- R-package/man/lgb.model.dt.tree.Rd | 2 +- R-package/man/lightgbm.Rd | 2 +- R-package/tests/testthat/test_basic.R | 4 +-- .../tests/testthat/test_custom_objective.R | 2 +- .../tests/testthat/test_lgb.interprete.R | 2 +- .../testthat/test_lgb.plot.interpretation.R | 4 +-- cmake/Sanitizer.cmake | 2 +- docker/README.md | 4 +-- docs/Parameters.rst | 2 +- docs/_static/js/script.js | 4 +-- examples/lambdarank/train.conf | 2 +- examples/regression/train.conf | 10 +++--- include/LightGBM/cuda/cuda_algorithms.hpp | 6 ++-- include/LightGBM/dataset.h | 2 +- include/LightGBM/utils/common.h | 4 +-- include/LightGBM/utils/random.h | 4 +-- python-package/lightgbm/basic.py | 2 +- python-package/lightgbm/dask.py | 2 +- src/boosting/bagging.hpp | 10 +++--- src/boosting/gbdt_model_text.cpp | 6 ++-- src/io/metadata.cpp | 6 ++-- src/network/linker_topo.cpp | 4 +-- src/objective/rank_objective.hpp | 2 +- .../cuda/cuda_best_split_finder.cpp | 2 +- src/treelearner/cuda/cuda_data_partition.cu | 18 +++++----- src/treelearner/cuda/cuda_data_partition.hpp | 18 +++++----- .../cuda/cuda_histogram_constructor.cpp | 2 +- .../cuda/cuda_histogram_constructor.hpp | 2 +- src/treelearner/cuda/cuda_leaf_splits.cpp | 8 ++--- src/treelearner/cuda/cuda_leaf_splits.cu | 16 ++++----- src/treelearner/cuda/cuda_leaf_splits.hpp | 6 ++-- .../data_parallel_tree_learner.cpp | 6 ++-- src/treelearner/feature_histogram.hpp | 6 ++-- src/treelearner/gpu_tree_learner.cpp | 2 +- .../kernels/histogram_16_64_256.cu | 4 +-- src/treelearner/ocl/histogram16.cl | 4 +-- src/treelearner/ocl/histogram256.cl | 2 +- src/treelearner/ocl/histogram64.cl | 2 +- src/treelearner/parallel_tree_learner.h | 8 ++--- src/treelearner/serial_tree_learner.cpp | 14 ++++---- .../voting_parallel_tree_learner.cpp | 12 +++---- tests/cpp_tests/test_chunked_array.cpp | 8 ++--- tests/cpp_tests/test_stream.cpp | 36 +++++++++---------- tests/python_package_test/test_dask.py | 2 +- tests/python_package_test/test_engine.py | 2 +- 58 files changed, 175 insertions(+), 148 deletions(-) create mode 100644 .typos.toml diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index 1ce698a49c72..a3f524b60be7 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -171,7 +171,7 @@ Write-Output "Done installing Rtools" Write-Output "Installing CMake" Add-Type -AssemblyName System.IO.Compression.FileSystem [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $? -# Remove old CMake shiped with RTools +# Remove old CMake shipped with RTools Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore Write-Output "Done installing CMake" diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml index 4efe658b7f45..195fd5f1c8f1 100644 --- a/.github/workflows/lock.yml +++ b/.github/workflows/lock.yml @@ -39,7 +39,7 @@ jobs: This pull request has been automatically locked since there has not been any recent activity since it was closed. To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues including a reference to this. - # what shoulld the locking status be? + # what should the locking status be? issue-lock-reason: 'resolved' pr-lock-reason: 'resolved' process-only: 'issues, prs' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e5e5dd8e9d9..b334db19b8e7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,4 +38,10 @@ repos: - repo: https://github.com/shellcheck-py/shellcheck-py rev: v0.10.0.1 hooks: - - id: shellcheck + - id: shellcheck + - repo: https://github.com/crate-ci/typos + rev: v1.23.2 + hooks: + - id: typos + args: ["--force-exclude"] + exclude: (\.gitignore$)|(^\.editorconfig$) diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 000000000000..6dc2c2c97529 --- /dev/null +++ b/.typos.toml @@ -0,0 +1,21 @@ +default.extend-ignore-re = [ + "/Ot", + "mis-alignment", + "mis-spelled", + "posix-seh-rt", +] + +[default.extend-words] +MAPE = "MAPE" +datas = "datas" +interprete = "interprete" +mape = "mape" +splitted = "splitted" + +[default.extend-identifiers] +ERRORs = "ERRORs" +GAM = "GAM" +ND24s = "ND24s" +WARNINGs = "WARNINGs" +fullset = "fullset" +thess = "thess" diff --git a/CMakeLists.txt b/CMakeLists.txt index 183ef62bd68e..4f57cf9622e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ option(USE_SWIG "Enable SWIG to generate Java API" OFF) option(USE_TIMETAG "Set to ON to output time costs" OFF) option(USE_CUDA "Enable CUDA-accelerated training " OFF) option(USE_DEBUG "Set to ON for Debug mode" OFF) -option(USE_SANITIZER "Use santizer flags" OFF) +option(USE_SANITIZER "Use sanitizer flags" OFF) set( ENABLED_SANITIZERS "address" "leak" "undefined" diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index a13516ff6569..85a91b1ce058 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -1114,7 +1114,7 @@ predict.lgb.Booster <- function(object, #' #' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} #' will cause it to ignore the fast-predict configuration and take the slow route instead -#' (but be aware that an existing configuration might not always be overriden by supplying +#' (but be aware that an existing configuration might not always be overridden by supplying #' different parameters or prediction type, so make sure to check that the output is what #' was expected when a prediction is to be made on a single row for something different than #' what is configured). @@ -1128,7 +1128,7 @@ predict.lgb.Booster <- function(object, #' and as such, this function will produce an error if passing \code{csr=TRUE} and #' \code{type = "contrib"} together. #' @inheritParams lgb_predict_shared_params -#' @param model LighGBM model object (class \code{lgb.Booster}). +#' @param model LightGBM model object (class \code{lgb.Booster}). #' #' \bold{The object will be modified in-place}. #' @param csr Whether the prediction function is going to be called on sparse CSR inputs. diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 7c76131f4f53..d60507cf00d4 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -9,7 +9,7 @@ #' \item{\code{Feature}: Feature names in the model.} #' \item{\code{Gain}: The total gain of this feature's splits.} #' \item{\code{Cover}: The number of observation related to this feature.} -#' \item{\code{Frequency}: The number of times a feature splited in trees.} +#' \item{\code{Frequency}: The number of times a feature split in trees.} #' } #' #' @examples diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index db4ef955f866..ac1b2f9aaf14 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -10,7 +10,7 @@ #' \emph{New in version 4.4.0} #' #' @return -#' A \code{data.table} with detailed information about model trees' nodes and leafs. +#' A \code{data.table} with detailed information about model trees' nodes and leaves. #' #' The columns of the \code{data.table} are: #' diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index efa593ffe12f..6cb4eebd8baf 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -139,7 +139,7 @@ NULL #' system, but be aware that getting the number of cores detected correctly requires package #' \code{RhpcBLASctl} to be installed. #' -#' This parameter gets overriden by \code{num_threads} and its aliases under \code{params} +#' This parameter gets overridden by \code{num_threads} and its aliases under \code{params} #' if passed there. #' #' \emph{New in version 4.0.0} diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index 0324f83f2da9..9f74ef7f4b2a 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -51,7 +51,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # For example, we are doing logistic loss, the prediction is score before logistic transformation # Keep this in mind when you use the customization, and maybe you need write customized evaluation function evalerror <- function(preds, dtrain) { diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index 6ca214c5ac7b..4435dd1b09b6 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -29,7 +29,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # For example, we are doing logistic loss, the prediction is score before logistic transformation # The built-in evaluation error assumes input is after logistic transformation # Keep this in mind when you use the customization, and maybe you need write customized evaluation function diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd index e02600451df5..9cd4339bdced 100644 --- a/R-package/man/lgb.configure_fast_predict.Rd +++ b/R-package/man/lgb.configure_fast_predict.Rd @@ -14,7 +14,7 @@ lgb.configure_fast_predict( ) } \arguments{ -\item{model}{LighGBM model object (class \code{lgb.Booster}). +\item{model}{LightGBM model object (class \code{lgb.Booster}). \bold{The object will be modified in-place}.} @@ -98,7 +98,7 @@ Calling this function multiple times with different parameters might not overrid Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} will cause it to ignore the fast-predict configuration and take the slow route instead - (but be aware that an existing configuration might not always be overriden by supplying + (but be aware that an existing configuration might not always be overridden by supplying different parameters or prediction type, so make sure to check that the output is what was expected when a prediction is to be made on a single row for something different than what is configured). diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index 79cb82f5d8ef..5099643112be 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -17,7 +17,7 @@ For a tree model, a \code{data.table} with the following columns: \item{\code{Feature}: Feature names in the model.} \item{\code{Gain}: The total gain of this feature's splits.} \item{\code{Cover}: The number of observation related to this feature.} - \item{\code{Frequency}: The number of times a feature splited in trees.} + \item{\code{Frequency}: The number of times a feature split in trees.} } } \description{ diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index ecfee17332f5..df36b6a94f42 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -18,7 +18,7 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L) \emph{New in version 4.4.0}} } \value{ -A \code{data.table} with detailed information about model trees' nodes and leafs. +A \code{data.table} with detailed information about model trees' nodes and leaves. The columns of the \code{data.table} are: diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 90cb3166bf5c..376a6d03a6b1 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -93,7 +93,7 @@ set to the iteration number of the best iteration.} system, but be aware that getting the number of cores detected correctly requires package \code{RhpcBLASctl} to be installed. - This parameter gets overriden by \code{num_threads} and its aliases under \code{params} + This parameter gets overridden by \code{num_threads} and its aliases under \code{params} if passed there. \emph{New in version 4.0.0}} diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index c734816b4038..7310815c4a6d 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -9,7 +9,7 @@ set.seed(708L) # to an accumulator then returns the current value. # This is used to mock the situation where an evaluation # metric increases every iteration -ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" +ACCUMULATOR_NAME <- "INCREASING_METRIC_ACCUMULATOR" assign(x = ACCUMULATOR_NAME, value = 0.0, envir = .GlobalEnv) .increasing_metric <- function(preds, dtrain) { @@ -1777,7 +1777,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th , early_stopping_rounds + 1L ) - # Booster should understand thatt all three of these metrics should be minimized + # Booster should understand that all three of these metrics should be minimized eval_info <- bst$.__enclos_env__$private$get_eval_info() expect_identical(eval_info, c("mape", "rmse", "l1")) expect_identical( diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 2c10b9d571dc..a1baf0067c4a 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -14,7 +14,7 @@ logregobj <- function(preds, dtrain) { # User-defined evaluation function returns a pair (metric_name, result, higher_better) # NOTE: when you do customized loss function, the default prediction value is margin -# This may make built-in evalution metric calculate wrong results +# This may make built-in evaluation metric calculate wrong results # Keep this in mind when you use the customization, and maybe you need write customized evaluation function evalerror <- function(preds, dtrain) { labels <- get_field(dtrain, "label") diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R index 322a80a55bc5..cfcd1c942f31 100644 --- a/R-package/tests/testthat/test_lgb.interprete.R +++ b/R-package/tests/testthat/test_lgb.interprete.R @@ -5,7 +5,7 @@ log(x / (1.0 - x)) } -test_that("lgb.intereprete works as expected for binary classification", { +test_that("lgb.interprete works as expected for binary classification", { data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/tests/testthat/test_lgb.plot.interpretation.R b/R-package/tests/testthat/test_lgb.plot.interpretation.R index 6cba9927942a..e8a021fc7237 100644 --- a/R-package/tests/testthat/test_lgb.plot.interpretation.R +++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R @@ -5,7 +5,7 @@ log(x / (1.0 - x)) } -test_that("lgb.plot.interepretation works as expected for binary classification", { +test_that("lgb.plot.interpretation works as expected for binary classification", { data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) @@ -57,7 +57,7 @@ test_that("lgb.plot.interepretation works as expected for binary classification" expect_null(plot_res) }) -test_that("lgb.plot.interepretation works as expected for multiclass classification", { +test_that("lgb.plot.interpretation works as expected for multiclass classification", { data(iris) # We must convert factors to numeric diff --git a/cmake/Sanitizer.cmake b/cmake/Sanitizer.cmake index a3768effac0d..f99048476d8b 100644 --- a/cmake/Sanitizer.cmake +++ b/cmake/Sanitizer.cmake @@ -18,7 +18,7 @@ macro(enable_sanitizer sanitizer) set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined") else() - message(FATAL_ERROR "Santizer ${sanitizer} not supported.") + message(FATAL_ERROR "Sanitizer ${sanitizer} not supported.") endif() endmacro() diff --git a/docker/README.md b/docker/README.md index dfedc2f4e3f1..e68346545ccf 100644 --- a/docker/README.md +++ b/docker/README.md @@ -55,7 +55,7 @@ After this runs, a LightGBM model can be found at `LightGBM-CLI-model.txt`. For more details on how to configure and use the LightGBM CLI, see https://lightgbm.readthedocs.io/en/latest/Quick-Start.html. -## Running the Python-package Сontainer +## Running the Python-package Container Build an image with the LightGBM Python-package installed. @@ -114,7 +114,7 @@ docker run \ python ``` -## Running the R-package Сontainer +## Running the R-package Container Build an image with the LightGBM R-package installed. diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 1f80a13d5731..b44d90ecec10 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -35,7 +35,7 @@ For example, in Python: .. code-block:: python - # use learning rate of 0.07, becase 'learning_rate' + # use learning rate of 0.07, because 'learning_rate' # is the primary parameter name lgb.train( params={ diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js index 3f129501e06f..c4717b8a0ee5 100644 --- a/docs/_static/js/script.js +++ b/docs/_static/js/script.js @@ -17,7 +17,7 @@ $(() => { $( '', ).appendTo("body"); - const collapsable = [ + const collapsible = [ "#build-threadless-version-not-recommended", "#build-mpi-version", "#build-gpu-version", @@ -25,7 +25,7 @@ $(() => { "#build-java-wrapper", "#build-c-unit-tests", ]; - $.each(collapsable, (_, val) => { + $.each(collapsible, (_, val) => { const header = `${val} > :header:first`; const content = `${val} :not(:header:first)`; $(header).addClass("closed"); diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf index 2aa2113b40d4..f007dcd6fe66 100644 --- a/examples/lambdarank/train.conf +++ b/examples/lambdarank/train.conf @@ -64,7 +64,7 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration diff --git a/examples/regression/train.conf b/examples/regression/train.conf index cd910af61dcf..992bc6c9ab53 100644 --- a/examples/regression/train.conf +++ b/examples/regression/train.conf @@ -20,7 +20,7 @@ objective = regression # binary_error metric = l2 -# frequence for metric output +# frequency for metric output metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric @@ -36,12 +36,12 @@ max_bin = 255 # forcedbins_filename = forced_bins.json # training data -# if exsting weight file, should name to "regression.train.weight" +# if existing weight file, should name to "regression.train.weight" # alias: train_data, train data = regression.train # validation data, support multi validation data, separated by ',' -# if exsting weight file, should name to "regression.test.weight" +# if existing weight file, should name to "regression.test.weight" # alias: valid, test, test_data, valid_data = regression.test @@ -62,7 +62,7 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration @@ -72,7 +72,7 @@ feature_fraction = 0.9 # Support bagging (data sub-sample), will perform bagging every 5 iterations bagging_freq = 5 -# Bagging farction, will random select 80% data on bagging +# Bagging fraction, will random select 80% data on bagging # alias: sub_row bagging_fraction = 0.8 diff --git a/include/LightGBM/cuda/cuda_algorithms.hpp b/include/LightGBM/cuda/cuda_algorithms.hpp index f79fc57e4f42..abda07b1582f 100644 --- a/include/LightGBM/cuda/cuda_algorithms.hpp +++ b/include/LightGBM/cuda/cuda_algorithms.hpp @@ -115,7 +115,7 @@ __device__ __forceinline__ T ShuffleReduceSumWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceSum(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; @@ -145,7 +145,7 @@ __device__ __forceinline__ T ShuffleReduceMaxWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceMax(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; @@ -196,7 +196,7 @@ __device__ __forceinline__ T ShuffleReduceMinWarp(T value, const data_size_t len return value; } -// reduce values from an 1-dimensional block (block size must be no greather than 1024) +// reduce values from an 1-dimensional block (block size must be no greater than 1024) template __device__ __forceinline__ T ShuffleReduceMin(T value, T* shared_mem_buffer, const size_t len) { const uint32_t warpLane = threadIdx.x % warpSize; diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 220a1f9f009c..ef214b7cd89d 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -376,7 +376,7 @@ class Metadata { std::vector query_boundaries_; /*! \brief Query weights */ std::vector query_weights_; - /*! \brief Number of querys */ + /*! \brief Number of queries */ data_size_t num_queries_; /*! \brief Number of Initial score, used to check correct weight file */ int64_t num_init_score_; diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 6c3ebf5d0096..67bc07b0ecd5 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -925,11 +925,11 @@ class AlignmentAllocator { inline ~AlignmentAllocator() throw() {} - inline pointer adress(reference r) { + inline pointer address(reference r) { return &r; } - inline const_pointer adress(const_reference r) const { + inline const_pointer address(const_reference r) const { return &r; } diff --git a/include/LightGBM/utils/random.h b/include/LightGBM/utils/random.h index 6f89f935b310..eb115ea96644 100644 --- a/include/LightGBM/utils/random.h +++ b/include/LightGBM/utils/random.h @@ -22,9 +22,9 @@ class Random { */ Random() { std::random_device rd; - auto genrator = std::mt19937(rd()); + auto generator = std::mt19937(rd()); std::uniform_int_distribution distribution(0, x); - x = distribution(genrator); + x = distribution(generator); } /*! * \brief Constructor, with specific seed diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index cf3723aadc63..99a690f38993 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -3525,7 +3525,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset": _log_warning(err_msg) self.feature_name = self.get_feature_name() _log_warning( - "Reseting categorical features.\n" + "Resetting categorical features.\n" "You can set new categorical features via ``set_categorical_feature`` method" ) self.categorical_feature = "auto" diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index e15979bc40db..dcdacba7366c 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -967,7 +967,7 @@ def _extract(items: List[Any], i: int) -> Any: out[i].append(part) # by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix - # the code below is used instead to ensure that the sparse type is preserved during concatentation + # the code below is used instead to ensure that the sparse type is preserved during concatenation if isinstance(pred_meta, ss.csr_matrix): concat_fn = partial(ss.vstack, format="csr") elif isinstance(pred_meta, ss.csc_matrix): diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp index 7a66b5696425..451384e6850a 100644 --- a/src/boosting/bagging.hpp +++ b/src/boosting/bagging.hpp @@ -73,17 +73,17 @@ class BaggingSampleStrategy : public SampleStrategy { for (data_size_t i = start_index + 1; i < end_index; ++i) { sampled_query_boundaries_[i] += sampled_query_boundaries_[i - 1]; } - sampled_query_boundaires_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1]; + sampled_query_boundaries_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1]; }); for (int thread_index = 1; thread_index < num_blocks; ++thread_index) { - sampled_query_boundaires_thread_buffer_[thread_index] += sampled_query_boundaires_thread_buffer_[thread_index - 1]; + sampled_query_boundaries_thread_buffer_[thread_index] += sampled_query_boundaries_thread_buffer_[thread_index - 1]; } Threading::For(0, num_sampled_queries_ + 1, 128, [this](int thread_index, data_size_t start_index, data_size_t end_index) { if (thread_index > 0) { for (data_size_t i = start_index; i < end_index; ++i) { - sampled_query_boundaries_[i] += sampled_query_boundaires_thread_buffer_[thread_index - 1]; + sampled_query_boundaries_[i] += sampled_query_boundaries_thread_buffer_[thread_index - 1]; } } }); @@ -171,7 +171,7 @@ class BaggingSampleStrategy : public SampleStrategy { } else { bagging_runner_.ReSize(num_queries_); sampled_query_boundaries_.resize(num_queries_ + 1, 0); - sampled_query_boundaires_thread_buffer_.resize(num_threads_, 0); + sampled_query_boundaries_thread_buffer_.resize(num_threads_, 0); bag_query_indices_.resize(num_data_); } bagging_rands_.clear(); @@ -280,7 +280,7 @@ class BaggingSampleStrategy : public SampleStrategy { /*! \brief query boundaries of the in-bag queries */ std::vector sampled_query_boundaries_; /*! \brief buffer for calculating sampled_query_boundaries_ */ - std::vector sampled_query_boundaires_thread_buffer_; + std::vector sampled_query_boundaries_thread_buffer_; /*! \brief in-bag query indices */ std::vector> bag_query_indices_; /*! \brief number of queries in the training dataset */ diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index 27be5afe066e..e8b6dd2332ef 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -545,17 +545,17 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { } } else { std::vector tree_sizes = CommonC::StringToArray(key_vals["tree_sizes"].c_str(), ' '); - std::vector tree_boundries(tree_sizes.size() + 1, 0); + std::vector tree_boundaries(tree_sizes.size() + 1, 0); int num_trees = static_cast(tree_sizes.size()); for (int i = 0; i < num_trees; ++i) { - tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i]; + tree_boundaries[i + 1] = tree_boundaries[i] + tree_sizes[i]; models_.emplace_back(nullptr); } OMP_INIT_EX(); #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_trees; ++i) { OMP_LOOP_EX_BEGIN(); - auto cur_p = p + tree_boundries[i]; + auto cur_p = p + tree_boundaries[i]; auto line_len = Common::GetLine(cur_p); std::string cur_line(cur_p, line_len); if (Common::StartsWith(cur_line, "Tree=")) { diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp index f46e6d1c9f14..f6f07c434661 100644 --- a/src/io/metadata.cpp +++ b/src/io/metadata.cpp @@ -225,7 +225,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector(metadata.num_position_ids()); - // get boundries + // get boundaries query_boundaries_ = metadata.query_boundaries(); if (query_boundaries_ == nullptr) { Log::Fatal("Ranking tasks require query information"); diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp index 95758542849c..e272ce744b1a 100644 --- a/src/treelearner/cuda/cuda_best_split_finder.cpp +++ b/src/treelearner/cuda/cuda_best_split_finder.cpp @@ -120,7 +120,7 @@ void CUDABestSplitFinder::Init() { void CUDABestSplitFinder::InitCUDAFeatureMetaInfo() { AllocateCUDAMemory(&cuda_is_feature_used_bytree_, static_cast(num_features_), __FILE__, __LINE__); - // intialize split find task information (a split find task is one pass through the histogram of a feature) + // initialize split find task information (a split find task is one pass through the histogram of a feature) num_tasks_ = 0; for (int inner_feature_index = 0; inner_feature_index < num_features_; ++inner_feature_index) { const uint32_t num_bin = feature_num_bins_[inner_feature_index]; diff --git a/src/treelearner/cuda/cuda_data_partition.cu b/src/treelearner/cuda/cuda_data_partition.cu index 3090b7a84176..4ca9d9279443 100644 --- a/src/treelearner/cuda/cuda_data_partition.cu +++ b/src/treelearner/cuda/cuda_data_partition.cu @@ -262,7 +262,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4( } } -#define GenDataToLeftBitVectorKernel_PARMS \ +#define GenDataToLeftBitVectorKernel_PARAMS \ const BIN_TYPE* column_data, \ const data_size_t num_data_in_leaf, \ const data_size_t* data_indices_in_leaf, \ @@ -286,7 +286,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4( template __global__ void GenDataToLeftBitVectorKernel( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, uint16_t* block_to_left_offset, data_size_t* block_to_left_offset_buffer, data_size_t* block_to_right_offset_buffer) { @@ -335,7 +335,7 @@ __global__ void GenDataToLeftBitVectorKernel( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_zero, const bool missing_is_na, const bool mfb_is_zero, @@ -363,7 +363,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_na, const bool mfb_is_zero, const bool mfb_is_na, @@ -380,7 +380,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_zero, const bool mfb_is_na, const bool max_bin_to_left, @@ -396,7 +396,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_na, const bool max_bin_to_left, const bool is_single_feature_in_column) { @@ -413,7 +413,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool max_bin_to_left, const bool is_single_feature_in_column) { if (!max_bin_to_left) { @@ -429,7 +429,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3( template void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner4( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool is_single_feature_in_column) { if (!is_single_feature_in_column) { GenDataToLeftBitVectorKernel @@ -548,7 +548,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernel( #undef UpdateDataIndexToLeafIndexKernel_PARAMS #undef UpdateDataIndexToLeafIndex_ARGS -#undef GenDataToLeftBitVectorKernel_PARMS +#undef GenDataToLeftBitVectorKernel_PARAMS #undef GenBitVector_ARGS template diff --git a/src/treelearner/cuda/cuda_data_partition.hpp b/src/treelearner/cuda/cuda_data_partition.hpp index f6bbab9b8c65..bfcce89af243 100644 --- a/src/treelearner/cuda/cuda_data_partition.hpp +++ b/src/treelearner/cuda/cuda_data_partition.hpp @@ -174,7 +174,7 @@ class CUDADataPartition { const int left_leaf_index, const int right_leaf_index); -#define GenDataToLeftBitVectorKernel_PARMS \ +#define GenDataToLeftBitVectorKernel_PARAMS \ const BIN_TYPE* column_data, \ const data_size_t num_data_in_leaf, \ const data_size_t* data_indices_in_leaf, \ @@ -187,7 +187,7 @@ class CUDADataPartition { template void LaunchGenDataToLeftBitVectorKernelInner( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_zero, const bool missing_is_na, const bool mfb_is_zero, @@ -197,7 +197,7 @@ class CUDADataPartition { template void LaunchGenDataToLeftBitVectorKernelInner0( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool missing_is_na, const bool mfb_is_zero, const bool mfb_is_na, @@ -206,7 +206,7 @@ class CUDADataPartition { template void LaunchGenDataToLeftBitVectorKernelInner1( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_zero, const bool mfb_is_na, const bool max_bin_to_left, @@ -214,23 +214,23 @@ class CUDADataPartition { template void LaunchGenDataToLeftBitVectorKernelInner2( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool mfb_is_na, const bool max_bin_to_left, const bool is_single_feature_in_column); template void LaunchGenDataToLeftBitVectorKernelInner3( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool max_bin_to_left, const bool is_single_feature_in_column); template void LaunchGenDataToLeftBitVectorKernelInner4( - GenDataToLeftBitVectorKernel_PARMS, + GenDataToLeftBitVectorKernel_PARAMS, const bool is_single_feature_in_column); -#undef GenDataToLeftBitVectorKernel_PARMS +#undef GenDataToLeftBitVectorKernel_PARAMS #define UpdateDataIndexToLeafIndexKernel_PARAMS \ const BIN_TYPE* column_data, \ @@ -379,7 +379,7 @@ class CUDADataPartition { int* cuda_split_info_buffer_; // dataset information - /*! \brief number of data in training set, for intialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */ + /*! \brief number of data in training set, for initialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */ data_size_t* cuda_num_data_; diff --git a/src/treelearner/cuda/cuda_histogram_constructor.cpp b/src/treelearner/cuda/cuda_histogram_constructor.cpp index 659db2aad24c..9f42eadec6f7 100644 --- a/src/treelearner/cuda/cuda_histogram_constructor.cpp +++ b/src/treelearner/cuda/cuda_histogram_constructor.cpp @@ -150,7 +150,7 @@ void CUDAHistogramConstructor::CalcConstructHistogramKernelDim( int* block_dim_y, const data_size_t num_data_in_smaller_leaf) { *block_dim_x = cuda_row_data_->max_num_column_per_partition(); - *block_dim_y = NUM_THRADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition(); + *block_dim_y = NUM_THREADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition(); *grid_dim_x = cuda_row_data_->num_feature_partitions(); *grid_dim_y = std::max(min_grid_dim_y_, ((num_data_in_smaller_leaf + NUM_DATA_PER_THREAD - 1) / NUM_DATA_PER_THREAD + (*block_dim_y) - 1) / (*block_dim_y)); diff --git a/src/treelearner/cuda/cuda_histogram_constructor.hpp b/src/treelearner/cuda/cuda_histogram_constructor.hpp index ddc78cb17d90..655029d23ba5 100644 --- a/src/treelearner/cuda/cuda_histogram_constructor.hpp +++ b/src/treelearner/cuda/cuda_histogram_constructor.hpp @@ -19,7 +19,7 @@ #include "cuda_leaf_splits.hpp" #define NUM_DATA_PER_THREAD (400) -#define NUM_THRADS_PER_BLOCK (504) +#define NUM_THREADS_PER_BLOCK (504) #define NUM_FEATURE_PER_THREAD_GROUP (28) #define SUBTRACT_BLOCK_SIZE (1024) #define FIX_HISTOGRAM_SHARED_MEM_SIZE (1024) diff --git a/src/treelearner/cuda/cuda_leaf_splits.cpp b/src/treelearner/cuda/cuda_leaf_splits.cpp index 803d4674ee48..2bdd0d47fae1 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.cpp +++ b/src/treelearner/cuda/cuda_leaf_splits.cpp @@ -16,7 +16,7 @@ num_data_(num_data) {} CUDALeafSplits::~CUDALeafSplits() {} void CUDALeafSplits::Init(const bool use_quantized_grad) { - num_blocks_init_from_gradients_ = (num_data_ + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS; + num_blocks_init_from_gradients_ = (num_data_ + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS; // allocate more memory for sum reduction in CUDA // only the first element records the final sum @@ -44,7 +44,7 @@ void CUDALeafSplits::InitValues( cuda_hessians_ = cuda_hessians; cuda_sum_of_gradients_buffer_.SetValue(0); cuda_sum_of_hessians_buffer_.SetValue(0); - LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf); + LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf); CopyFromCUDADeviceToHost(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__); CopyFromCUDADeviceToHost(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__); SynchronizeCUDADevice(__FILE__, __LINE__); @@ -59,7 +59,7 @@ void CUDALeafSplits::InitValues( const score_t* grad_scale, const score_t* hess_scale) { cuda_gradients_ = reinterpret_cast(cuda_gradients_and_hessians); cuda_hessians_ = nullptr; - LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale); + LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale); CopyFromCUDADeviceToHost(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__); CopyFromCUDADeviceToHost(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__); SynchronizeCUDADevice(__FILE__, __LINE__); @@ -67,7 +67,7 @@ void CUDALeafSplits::InitValues( void CUDALeafSplits::Resize(const data_size_t num_data) { num_data_ = num_data; - num_blocks_init_from_gradients_ = (num_data + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS; + num_blocks_init_from_gradients_ = (num_data + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS; cuda_sum_of_gradients_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); cuda_sum_of_hessians_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); cuda_sum_of_gradients_hessians_buffer_.Resize(static_cast(num_blocks_init_from_gradients_)); diff --git a/src/treelearner/cuda/cuda_leaf_splits.cu b/src/treelearner/cuda/cuda_leaf_splits.cu index ae505ecd55dd..0c796be9f20a 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.cu +++ b/src/treelearner/cuda/cuda_leaf_splits.cu @@ -180,23 +180,23 @@ void CUDALeafSplits::LaunchInitValuesEmptyKernel() { InitValuesEmptyKernel<<<1, 1>>>(cuda_struct_.RawData()); } -void CUDALeafSplits::LaunchInitValuesKernal( +void CUDALeafSplits::LaunchInitValuesKernel( const double lambda_l1, const double lambda_l2, const data_size_t* cuda_bagging_data_indices, const data_size_t* cuda_data_indices_in_leaf, const data_size_t num_used_indices, hist_t* cuda_hist_in_leaf) { if (cuda_bagging_data_indices == nullptr) { - CUDAInitValuesKernel1<<>>( + CUDAInitValuesKernel1<<>>( cuda_gradients_, cuda_hessians_, num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData()); } else { - CUDAInitValuesKernel1<<>>( + CUDAInitValuesKernel1<<>>( cuda_gradients_, cuda_hessians_, num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData()); } SynchronizeCUDADevice(__FILE__, __LINE__); - CUDAInitValuesKernel2<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>( + CUDAInitValuesKernel2<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>( lambda_l1, lambda_l2, num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), @@ -208,7 +208,7 @@ void CUDALeafSplits::LaunchInitValuesKernal( SynchronizeCUDADevice(__FILE__, __LINE__); } -void CUDALeafSplits::LaunchInitValuesKernal( +void CUDALeafSplits::LaunchInitValuesKernel( const double lambda_l1, const double lambda_l2, const data_size_t* cuda_bagging_data_indices, const data_size_t* cuda_data_indices_in_leaf, @@ -217,17 +217,17 @@ void CUDALeafSplits::LaunchInitValuesKernal( const score_t* grad_scale, const score_t* hess_scale) { if (cuda_bagging_data_indices == nullptr) { - CUDAInitValuesKernel3<<>>( + CUDAInitValuesKernel3<<>>( reinterpret_cast(cuda_gradients_), num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale); } else { - CUDAInitValuesKernel3<<>>( + CUDAInitValuesKernel3<<>>( reinterpret_cast(cuda_gradients_), num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(), cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale); } SynchronizeCUDADevice(__FILE__, __LINE__); - CUDAInitValuesKernel4<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>( + CUDAInitValuesKernel4<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>( lambda_l1, lambda_l2, num_blocks_init_from_gradients_, cuda_sum_of_gradients_buffer_.RawData(), diff --git a/src/treelearner/cuda/cuda_leaf_splits.hpp b/src/treelearner/cuda/cuda_leaf_splits.hpp index c2635346098b..43a0492452bd 100644 --- a/src/treelearner/cuda/cuda_leaf_splits.hpp +++ b/src/treelearner/cuda/cuda_leaf_splits.hpp @@ -13,7 +13,7 @@ #include #include -#define NUM_THRADS_PER_BLOCK_LEAF_SPLITS (1024) +#define NUM_THREADS_PER_BLOCK_LEAF_SPLITS (1024) #define NUM_DATA_THREAD_ADD_LEAF_SPLITS (6) namespace LightGBM { @@ -142,14 +142,14 @@ class CUDALeafSplits { private: void LaunchInitValuesEmptyKernel(); - void LaunchInitValuesKernal( + void LaunchInitValuesKernel( const double lambda_l1, const double lambda_l2, const data_size_t* cuda_bagging_data_indices, const data_size_t* cuda_data_indices_in_leaf, const data_size_t num_used_indices, hist_t* cuda_hist_in_leaf); - void LaunchInitValuesKernal( + void LaunchInitValuesKernel( const double lambda_l1, const double lambda_l2, const data_size_t* cuda_bagging_data_indices, const data_size_t* cuda_data_indices_in_leaf, diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp index 64c342e5b01d..670788118455 100644 --- a/src/treelearner/data_parallel_tree_learner.cpp +++ b/src/treelearner/data_parallel_tree_learner.cpp @@ -260,12 +260,12 @@ void DataParallelTreeLearner::FindBestSplits(const Tree* tree) { if (smaller_leaf_num_bits <= 16) { std::memcpy(input_buffer_.data() + buffer_write_start_pos_int16_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawDataInt16(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histogram()); } else { if (local_smaller_leaf_num_bits == 32) { std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawDataInt32(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histogram()); } else { this->smaller_leaf_histogram_array_[feature_index].CopyFromInt16ToInt32( input_buffer_.data() + buffer_write_start_pos_[feature_index]); @@ -274,7 +274,7 @@ void DataParallelTreeLearner::FindBestSplits(const Tree* tree) { } else { std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index], this->smaller_leaf_histogram_array_[feature_index].RawData(), - this->smaller_leaf_histogram_array_[feature_index].SizeOfHistgram()); + this->smaller_leaf_histogram_array_[feature_index].SizeOfHistogram()); } } global_timer.Stop("DataParallelTreeLearner::ReduceHistogram::Copy"); diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 70dd0fb5436f..2d4abbd27af1 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -668,15 +668,15 @@ class FeatureHistogram { /*! * \brief Binary size of this histogram */ - int SizeOfHistgram() const { + int SizeOfHistogram() const { return (meta_->num_bin - meta_->offset) * kHistEntrySize; } - int SizeOfInt32Histgram() const { + int SizeOfInt32Histogram() const { return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize; } - int SizeOfInt16Histgram() const { + int SizeOfInt16Histogram() const { return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize; } diff --git a/src/treelearner/gpu_tree_learner.cpp b/src/treelearner/gpu_tree_learner.cpp index 7c6c811c3b45..1bf21d65ccc6 100644 --- a/src/treelearner/gpu_tree_learner.cpp +++ b/src/treelearner/gpu_tree_learner.cpp @@ -777,7 +777,7 @@ void GPUTreeLearner::ResetIsConstantHessian(bool is_constant_hessian) { void GPUTreeLearner::BeforeTrain() { #if GPU_DEBUG >= 2 - printf("Copying intial full gradients and hessians to device\n"); + printf("Copying initial full gradients and hessians to device\n"); #endif // Copy initial full hessians and gradients to GPU. // We start copying as early as possible, instead of at ConstructHistogram(). diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu index d778d650f722..59662fb19d55 100644 --- a/src/treelearner/kernels/histogram_16_64_256.cu +++ b/src/treelearner/kernels/histogram_16_64_256.cu @@ -508,7 +508,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4 for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) { // prefetch the next iteration variables - // we don't need bondary check because we have made the buffer large + // we don't need boundary check because we have made the buffer large int i_next = i + subglobal_size; #ifdef IGNORE_INDICES // we need to check to bounds here @@ -752,7 +752,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary?? // total size: 2 * 256 * size_of(float) = 2 KB // organization: each feature/grad/hessian is at a different bank, - // as indepedent of the feature value as possible + // as independent of the feature value as possible acc_type *gh_hist = reinterpret_cast(shared_array); // counter histogram diff --git a/src/treelearner/ocl/histogram16.cl b/src/treelearner/ocl/histogram16.cl index 21624ec9ee10..be590c20666b 100644 --- a/src/treelearner/ocl/histogram16.cl +++ b/src/treelearner/ocl/histogram16.cl @@ -8,7 +8,7 @@ #ifndef __OPENCL_VERSION__ // If we are including this file in C++, // the entire source file following (except the last #endif) will become -// a raw string literal. The extra ")" is just for mathcing parentheses +// a raw string literal. The extra ")" is just for matching parentheses // to make the editor happy. The extra ")" and extra endif will be skipped. // DO NOT add anything between here and the next #ifdef, otherwise you need // to modify the skip count at the end of this file. @@ -475,7 +475,7 @@ R""() // prefetch the next iteration variables - // we don't need bondary check because if it is out of boundary, ind_next = 0 + // we don't need boundary check because if it is out of boundary, ind_next = 0 #ifndef IGNORE_INDICES feature4_next = feature_data[ind_next]; #endif diff --git a/src/treelearner/ocl/histogram256.cl b/src/treelearner/ocl/histogram256.cl index 3351f9efa7c3..b5c049e1272d 100644 --- a/src/treelearner/ocl/histogram256.cl +++ b/src/treelearner/ocl/histogram256.cl @@ -387,7 +387,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base, const uint subglobal_tid = gtid - group_feature * subglobal_size; // extract feature mask, when a byte is set to 0, that feature is disabled #if ENABLE_ALL_FEATURES == 1 - // hopefully the compiler will propogate the constants and eliminate all branches + // hopefully the compiler will propagate the constants and eliminate all branches uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff); #else uchar4 feature_mask = feature_masks[group_feature]; diff --git a/src/treelearner/ocl/histogram64.cl b/src/treelearner/ocl/histogram64.cl index 48fa8c506d8b..4ec4d6371df5 100644 --- a/src/treelearner/ocl/histogram64.cl +++ b/src/treelearner/ocl/histogram64.cl @@ -454,7 +454,7 @@ R""() // prefetch the next iteration variables - // we don't need bondary check because if it is out of boundary, ind_next = 0 + // we don't need boundary check because if it is out of boundary, ind_next = 0 #ifndef IGNORE_INDICES feature4_next = feature_data[ind_next]; #endif diff --git a/src/treelearner/parallel_tree_learner.h b/src/treelearner/parallel_tree_learner.h index b942dceab28b..aff8ac0fd4c5 100644 --- a/src/treelearner/parallel_tree_learner.h +++ b/src/treelearner/parallel_tree_learner.h @@ -148,12 +148,12 @@ class VotingParallelTreeLearner: public TREELEARNER_T { * \brief Perform global voting * \param leaf_idx index of leaf * \param splits All splits from local voting - * \param out Result of gobal voting, only store feature indices + * \param out Result of global voting, only store feature indices */ void GlobalVoting(int leaf_idx, const std::vector& splits, std::vector* out); /*! - * \brief Copy local histgram to buffer + * \brief Copy local histogram to buffer * \param smaller_top_features Selected features for smaller leaf * \param larger_top_features Selected features for larger leaf */ @@ -183,9 +183,9 @@ class VotingParallelTreeLearner: public TREELEARNER_T { std::vector block_start_; /*! \brief Block size for reduce scatter */ std::vector block_len_; - /*! \brief Read positions for feature histgrams at smaller leaf */ + /*! \brief Read positions for feature histograms at smaller leaf */ std::vector smaller_buffer_read_start_pos_; - /*! \brief Read positions for feature histgrams at larger leaf */ + /*! \brief Read positions for feature histograms at larger leaf */ std::vector larger_buffer_read_start_pos_; /*! \brief Size for reduce scatter */ comm_size_t reduce_scatter_size_; diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 14ede072dc9e..01cdd7623c02 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -735,24 +735,24 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf, std::set SerialTreeLearner::FindAllForceFeatures(Json force_split_leaf_setting) { std::set force_features; - std::queue force_split_leafs; + std::queue force_split_leaves; - force_split_leafs.push(force_split_leaf_setting); + force_split_leaves.push(force_split_leaf_setting); - while (!force_split_leafs.empty()) { - Json split_leaf = force_split_leafs.front(); - force_split_leafs.pop(); + while (!force_split_leaves.empty()) { + Json split_leaf = force_split_leaves.front(); + force_split_leaves.pop(); const int feature_index = split_leaf["feature"].int_value(); const int feature_inner_index = train_data_->InnerFeatureIndex(feature_index); force_features.insert(feature_inner_index); if (split_leaf.object_items().count("left") > 0) { - force_split_leafs.push(split_leaf["left"]); + force_split_leaves.push(split_leaf["left"]); } if (split_leaf.object_items().count("right") > 0) { - force_split_leafs.push(split_leaf["right"]); + force_split_leaves.push(split_leaf["right"]); } } diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index b88db5a7ba28..37f2d4cf2641 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -207,9 +207,9 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vec smaller_buffer_read_start_pos_[inner_feature_index] = static_cast(cur_size); } // copy - std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram()); - cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); - reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); + std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram()); + cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); + reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); ++smaller_idx; } if (cur_used_features >= cur_total_feature) { @@ -225,9 +225,9 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vec larger_buffer_read_start_pos_[inner_feature_index] = static_cast(cur_size); } // copy - std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram()); - cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); - reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram(); + std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram()); + cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); + reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram(); ++larger_idx; } } diff --git a/tests/cpp_tests/test_chunked_array.cpp b/tests/cpp_tests/test_chunked_array.cpp index 9bfd857299ab..bc58918082a8 100644 --- a/tests/cpp_tests/test_chunked_array.cpp +++ b/tests/cpp_tests/test_chunked_array.cpp @@ -217,8 +217,8 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) { // Number of trials for each new ChunkedArray configuration. Pass 100 times over the search space: const size_t N_TRIALS = MAX_CHUNKS_SEARCH * MAX_IN_CHUNK_SEARCH_IDX * 100; const int INVALID = -1; // A negative value signaling the requested value lives in an invalid address. - const int UNITIALIZED = -99; // A negative value to signal this was never updated. - std::vector ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED); // Memorize latest inserted values. + const int UNINITIALIZED = -99; // A negative value to signal this was never updated. + std::vector ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED); // Memorize latest inserted values. // Each outer loop iteration changes the test by adding +1 chunk. We start with 1 chunk only: for (size_t chunks = 1; chunks < MAX_CHUNKS_SEARCH; ++chunks) { @@ -249,10 +249,10 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) { } // Final check: ensure even with overrides, all valid insertions store the latest value at that address: - std::vector coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED); + std::vector coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED); ca_.coalesce_to(coalesced_out.data(), true); // Export all valid addresses. for (size_t i = 0; i < ref_values.size(); ++i) { - if (ref_values[i] != UNITIALIZED) { + if (ref_values[i] != UNINITIALIZED) { // Test in 2 ways that the values are correctly laid out in memory: EXPECT_EQ(ca_.getitem(i / CHUNK_SIZE, i % CHUNK_SIZE, INVALID), ref_values[i]); EXPECT_EQ(coalesced_out[i], ref_values[i]); diff --git a/tests/cpp_tests/test_stream.cpp b/tests/cpp_tests/test_stream.cpp index bc5f73b0a3ee..a656af1e2fe9 100644 --- a/tests/cpp_tests/test_stream.cpp +++ b/tests/cpp_tests/test_stream.cpp @@ -17,7 +17,7 @@ using LightGBM::TestUtils; void test_stream_dense( int8_t creation_type, - DatasetHandle ref_datset_handle, + DatasetHandle ref_dataset_handle, int32_t nrows, int32_t ncols, int32_t nclasses, @@ -86,7 +86,7 @@ void test_stream_dense( case 1: Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows dense data with a batch size of %d", nrows, batch_count); - result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle); + result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result; break; } @@ -131,7 +131,7 @@ void test_stream_dense( void test_stream_sparse( int8_t creation_type, - DatasetHandle ref_datset_handle, + DatasetHandle ref_dataset_handle, int32_t nrows, int32_t ncols, int32_t nclasses, @@ -203,7 +203,7 @@ void test_stream_sparse( case 1: Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows sparse data with a batch size of %d", nrows, batch_count); - result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle); + result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result; break; } @@ -249,13 +249,13 @@ void test_stream_sparse( TEST(Stream, PushDenseRowsWithMetadata) { // Load some test data - DatasetHandle ref_datset_handle; + DatasetHandle ref_dataset_handle; const char* params = "max_bin=15"; // Use the smaller ".test" data because we don't care about the actual data and it's smaller - int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle); + int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle); EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result; - Dataset* ref_dataset = static_cast(ref_datset_handle); + Dataset* ref_dataset = static_cast(ref_dataset_handle); auto noriginalrows = ref_dataset->num_data(); Log::Info("Row count: %d", noriginalrows); Log::Info("Feature group count: %d", ref_dataset->num_features()); @@ -266,9 +266,9 @@ TEST(Stream, PushDenseRowsWithMetadata) { unused_init_scores.resize(noriginalrows * nclasses); std::vector unused_groups; unused_groups.assign(noriginalrows, 1); - result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); + result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result; - result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2); + result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2); EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result; // Now use the reference dataset schema to make some testable Datasets with N rows each @@ -290,23 +290,23 @@ TEST(Stream, PushDenseRowsWithMetadata) { for (size_t j = 0; j < batch_counts.size(); ++j) { auto type = creation_types[i]; auto batch_count = batch_counts[j]; - test_stream_dense(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups); + test_stream_dense(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups); } } - result = LGBM_DatasetFree(ref_datset_handle); + result = LGBM_DatasetFree(ref_dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result; } TEST(Stream, PushSparseRowsWithMetadata) { // Load some test data - DatasetHandle ref_datset_handle; + DatasetHandle ref_dataset_handle; const char* params = "max_bin=15"; // Use the smaller ".test" data because we don't care about the actual data and it's smaller - int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle); + int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle); EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result; - Dataset* ref_dataset = static_cast(ref_datset_handle); + Dataset* ref_dataset = static_cast(ref_dataset_handle); auto noriginalrows = ref_dataset->num_data(); Log::Info("Row count: %d", noriginalrows); Log::Info("Feature group count: %d", ref_dataset->num_features()); @@ -317,9 +317,9 @@ TEST(Stream, PushSparseRowsWithMetadata) { unused_init_scores.resize(noriginalrows * nclasses); std::vector unused_groups; unused_groups.assign(noriginalrows, 1); - result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); + result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1); EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result; - result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2); + result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2); EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result; // Now use the reference dataset schema to make some testable Datasets with N rows each @@ -344,10 +344,10 @@ TEST(Stream, PushSparseRowsWithMetadata) { for (size_t j = 0; j < batch_counts.size(); ++j) { auto type = creation_types[i]; auto batch_count = batch_counts[j]; - test_stream_sparse(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups); + test_stream_sparse(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups); } } - result = LGBM_DatasetFree(ref_datset_handle); + result = LGBM_DatasetFree(ref_dataset_handle); EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result; } diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py index 2eeba46f2869..b5e17991f63d 100644 --- a/tests/python_package_test/test_dask.py +++ b/tests/python_package_test/test_dask.py @@ -471,7 +471,7 @@ def test_classifier_custom_objective(output, task, cluster): assert_eq(p1_proba, p1_proba_local) -def test_machines_to_worker_map_unparseable_host_names(): +def test_machines_to_worker_map_unparsable_host_names(): workers = {"0.0.0.1:80": {}, "0.0.0.2:80": {}} machines = "0.0.0.1:80,0.0.0.2:80" with pytest.raises(ValueError, match="Could not parse host name from worker address '0.0.0.1:80'"): diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 9ae471e7f4b9..cb2e893c9612 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -660,7 +660,7 @@ def test_ranking_prediction_early_stopping(): # Simulates position bias for a given ranking dataset. -# The ouput dataset is identical to the input one with the exception for the relevance labels. +# The output dataset is identical to the input one with the exception for the relevance labels. # The new labels are generated according to an instance of a cascade user model: # for each query, the user is simulated to be traversing the list of documents ranked by a baseline ranker # (in our example it is simply the ordering by some feature correlated with relevance, e.g., 34) From ea04c66c86e31ebf68ec151d75c14fbdfb6ea681 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Tue, 3 Dec 2024 15:08:10 +0300 Subject: [PATCH 15/27] [docs] update installation guide (#6696) * Update Installation-Guide.rst * Update script.js * replace all Ninja sections with one paragraph --------- Co-authored-by: shiyu1994 --- docs/Installation-Guide.rst | 621 ++++++++++++++++++++++-------------- docs/_static/js/script.js | 4 +- 2 files changed, 389 insertions(+), 236 deletions(-) diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst index 41b84f9b82c2..1e28d037388d 100644 --- a/docs/Installation-Guide.rst +++ b/docs/Installation-Guide.rst @@ -1,17 +1,30 @@ Installation Guide ================== -This is a guide for building the LightGBM Command Line Interface (CLI). If you want to build the Python-package or R-package please refer to `Python-package`_ and `R-package`_ folders respectively. - All instructions below are aimed at compiling the 64-bit version of LightGBM. It is worth compiling the 32-bit version only in very rare special cases involving environmental limitations. The 32-bit version is slow and untested, so use it at your own risk and don't forget to adjust some of the commands below when installing. +By default, instructions below will use **VS Build Tools** or **make** tool to compile the code. +It it possible to use `Ninja`_ tool instead of make on all platforms, but VS Build Tools cannot be replaced with Ninja. +You can add ``-G Ninja`` to CMake flags to use Ninja. + +By default, instructions below will produce a shared library file and an executable file with command-line interface. +You can add ``-DBUILD_CLI=OFF`` to CMake flags to disable the executable compilation. + If you need to build a static library instead of a shared one, you can add ``-DBUILD_STATIC_LIB=ON`` to CMake flags. +By default, instructions below will place header files into system-wide folder. +You can add ``-DINSTALL_HEADERS=OFF`` to CMake flags to disable headers installation. + +By default, on macOS, CMake is looking into Homebrew standard folders for finding dependencies (e.g. OpenMP). +You can add ``-DUSE_HOMEBREW_FALLBACK=OFF`` to CMake flags to disable this behaviour. + Users who want to perform benchmarking can make LightGBM output time costs for different internal routines by adding ``-DUSE_TIMETAG=ON`` to CMake flags. -It is possible to build LightGBM in debug mode. In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM. +It is possible to build LightGBM in debug mode. +In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. +To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM. .. _sanitizers: @@ -30,7 +43,7 @@ It is very useful to build `C++ unit tests <#build-c-unit-tests>`__ with sanitiz .. _nightly-builds: -You can also download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|. +You can download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|. .. contents:: **Contents** :depth: 1 @@ -40,12 +53,10 @@ You can also download the artifacts of the latest successful build on master bra Windows ~~~~~~~ -On Windows LightGBM can be built using +On Windows, LightGBM can be built using - **Visual Studio**; - - **CMake** and **VS Build Tools**; - - **CMake** and **MinGW**. Visual Studio (or VS Build Tools) @@ -54,22 +65,23 @@ Visual Studio (or VS Build Tools) With GUI ******** -1. Install `Visual Studio`_ (2015 or newer). +1. Install `Visual Studio`_. 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -3. Go to ``LightGBM-master/windows`` folder. +3. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder. +The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder. From Command Line ***************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -98,7 +110,7 @@ MinGW-w64 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder. -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. It is recommended that you use **Visual Studio** since it has better multithreading efficiency in **Windows** for many-core systems (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__). @@ -106,9 +118,17 @@ It is recommended that you use **Visual Studio** since it has better multithread Linux ~~~~~ -On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**. +On Linux, LightGBM can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -1. Install `CMake`_. +gcc +^^^ + +1. Install `CMake`_ and **gcc**. 2. Run the following commands: @@ -119,53 +139,69 @@ On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**. cmake -B build -S . cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). +Clang +^^^^^ -Using ``Ninja`` -^^^^^^^^^^^^^^^ +1. Install `CMake`_, **Clang** and **OpenMP**. -On Linux, LightGBM can also be built with `Ninja `__ instead of ``make``. +2. Run the following commands: -.. code:: sh + .. code:: sh git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -G 'Ninja' - cmake --build build -j2 + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . + cmake --build build -j4 macOS ~~~~~ -On macOS LightGBM can be installed using **Homebrew**, or can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, LightGBM can be installed using -Apple Clang -^^^^^^^^^^^ +- **Homebrew**; +- **MacPorts**; + +or can be built using -Only **Apple Clang** version 8.1 or higher is supported. +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. Install Using ``Homebrew`` -************************** +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: sh brew install lightgbm -Build from GitHub -***************** +Refer to https://formulae.brew.sh/formula/lightgbm for more details. -1. Install `CMake`_ : +Install Using ``MacPorts`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ - .. code:: sh +.. code:: sh - brew install cmake + sudo port install LightGBM + +Refer to https://ports.macports.org/port/LightGBM for more details. + +**Note**: Port for LightGBM is not maintained by LightGBM's maintainers. -2. Install **OpenMP**: +Build from GitHub +^^^^^^^^^^^^^^^^^ + +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. + +Apple Clang +*********** + +1. Install `CMake`_ and **OpenMP**: .. code:: sh - brew install libomp + brew install cmake libomp -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -175,21 +211,15 @@ Build from GitHub cmake --build build -j4 gcc -^^^ - -1. Install `CMake`_ : - - .. code:: sh - - brew install cmake +*** -2. Install **gcc**: +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install gcc + brew install cmake gcc -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -213,12 +243,10 @@ You can build LightGBM without OpenMP support but it is **strongly not recommend Windows ^^^^^^^ -On Windows a version of LightGBM without OpenMP support can be built using +On Windows, a version of LightGBM without OpenMP support can be built using - **Visual Studio**; - - **CMake** and **VS Build Tools**; - - **CMake** and **MinGW**. Visual Studio (or VS Build Tools) @@ -227,26 +255,27 @@ Visual Studio (or VS Build Tools) With GUI -------- -1. Install `Visual Studio`_ (2015 or newer). +1. Install `Visual Studio`_. 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -3. Go to ``LightGBM-master/windows`` folder. +3. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -4. Open ``LightGBM.sln`` file with **Visual Studio**. +4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library. -5. Go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``. +5. Go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``. -6. Get back to the project's main screen, then choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +6. Get back to the project's main screen and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder. +The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder. From Command Line ----------------- -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -275,14 +304,36 @@ MinGW-w64 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder. -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. Linux ^^^^^ -On Linux a version of LightGBM without OpenMP support can be built using **CMake** and **gcc** or **Clang**. +On Linux, a version of LightGBM without OpenMP support can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. + +gcc +*** + +1. Install `CMake`_ and **gcc**. + +2. Run the following commands: + + .. code:: sh -1. Install `CMake`_. + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_OPENMP=OFF + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_ and **Clang**. 2. Run the following commands: @@ -290,20 +341,24 @@ On Linux a version of LightGBM without OpenMP support can be built using **CMake git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine cmake -B build -S . -DUSE_OPENMP=OFF cmake --build build -j4 macOS ^^^^^ -On macOS a version of LightGBM without OpenMP support can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, a version of LightGBM without OpenMP support can be built using + +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. + +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. Apple Clang *********** -Only **Apple Clang** version 8.1 or higher is supported. - -1. Install `CMake`_ : +1. Install `CMake`_: .. code:: sh @@ -321,19 +376,13 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install cmake + brew install cmake gcc -2. Install **gcc**: - - .. code:: sh - - brew install gcc - -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -354,35 +403,36 @@ If you need to run a distributed learning application with high performance comm Windows ^^^^^^^ -On Windows an MPI version of LightGBM can be built using +On Windows, an MPI version of LightGBM can be built using - **MS MPI** and **Visual Studio**; - - **MS MPI**, **CMake** and **VS Build Tools**. +**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it. + With GUI ******** 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed. -2. Install `Visual Studio`_ (2015 or newer). +2. Install `Visual Studio`_. 3. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it. -4. Go to ``LightGBM-master/windows`` folder. +4. Go to ``LightGBM-complete_source_code_zip/windows`` folder. -5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``. +5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``. - If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. + If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine. -The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release_mpi`` folder. +The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release_mpi`` folder. From Command Line ***************** 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed. -2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 3. Run the following commands: @@ -395,18 +445,22 @@ From Command Line The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder. -**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it. - Linux ^^^^^ -On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **gcc** or **Clang**. +On Linux, an MPI version of LightGBM can be built using -1. Install `Open MPI`_. +- **CMake**, **gcc** and **Open MPI**; +- **CMake**, **Clang** and **Open MPI**. -2. Install `CMake`_. +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -3. Run the following commands: +gcc +*** + +1. Install `CMake`_, **gcc** and `Open MPI`_. + +2. Run the following commands: .. code:: sh @@ -415,37 +469,41 @@ On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** a cmake -B build -S . -DUSE_MPI=ON cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). - -macOS -^^^^^ +Clang +***** -On macOS an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **Apple Clang** or **gcc**. +1. Install `CMake`_, **Clang**, **OpenMP** and `Open MPI`_. -Apple Clang -*********** +2. Run the following commands: -Only **Apple Clang** version 8.1 or higher is supported. + .. code:: sh -1. Install `CMake`_ : + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_MPI=ON + cmake --build build -j4 - .. code:: sh +macOS +^^^^^ - brew install cmake +On macOS, an MPI version of LightGBM can be built using -2. Install **OpenMP**: +- **CMake**, **Open MPI** and **Apple Clang**; +- **CMake**, **Open MPI** and **gcc**. - .. code:: sh +After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder. - brew install libomp +Apple Clang +*********** -3. Install **Open MPI**: +1. Install `CMake`_, **OpenMP** and `Open MPI`_: .. code:: sh - brew install open-mpi + brew install cmake libomp open-mpi -4. Run the following commands: +2. Run the following commands: .. code:: sh @@ -457,25 +515,13 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : - - .. code:: sh - - brew install cmake - -2. Install **gcc**: +1. Install `CMake`_, `Open MPI`_ and **gcc**: .. code:: sh - brew install gcc + brew install cmake open-mpi gcc -3. Install **Open MPI**: - - .. code:: sh - - brew install open-mpi - -4. Run the following commands: +2. Run the following commands: .. code:: sh @@ -488,48 +534,19 @@ gcc Build GPU Version ~~~~~~~~~~~~~~~~~ -Linux -^^^^^ - -On Linux a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **gcc** or **Clang**. - -The following dependencies should be installed before compilation: - -- **OpenCL** 1.2 headers and libraries, which is usually provided by GPU manufacture. - - The generic OpenCL ICD packages (for example, Debian package ``ocl-icd-libopencl1`` and ``ocl-icd-opencl-dev``) can also be used. - -- **libboost** 1.56 or later (1.61 or later is recommended). - - We use Boost.Compute as the interface to GPU, which is part of the Boost library since version 1.61. However, since we include the source code of Boost.Compute as a submodule, we only require the host has Boost 1.56 or later installed. We also use Boost.Align for memory allocation. Boost.Compute requires Boost.System and Boost.Filesystem to store offline kernel cache. - - The following Debian packages should provide necessary Boost libraries: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``. - -- **CMake** - -To build LightGBM GPU version, run the following commands: - -.. code:: sh - - git clone --recursive https://github.com/microsoft/LightGBM - cd LightGBM - cmake -B build -S . -DUSE_GPU=1 - # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: - # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ - cmake --build build - -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). - Windows ^^^^^^^ -On Windows a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **VS Build Tools** or **MinGW**. +On Windows, a GPU version of LightGBM (``device_type=gpu``) can be built using + +- **OpenCL**, **Boost**, **CMake** and **VS Build Tools**; +- **OpenCL**, **Boost**, **CMake** and **MinGW**. If you use **MinGW**, the build procedure is similar to the build on Linux. Following procedure is for the **MSVC** (Microsoft Visual C++) build. -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is installed). 2. Install **OpenCL** for Windows. The installation depends on the brand (NVIDIA, AMD, Intel) of your GPU card. @@ -559,13 +576,68 @@ Following procedure is for the **MSVC** (Microsoft Visual C++) build. git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 + cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: - # cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include" + # cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include" cmake --build build --target ALL_BUILD --config Release **Note**: ``C:/local/boost_1_63_0`` and ``C:/local/boost_1_63_0/lib64-msvc-14.0`` are locations of your **Boost** binaries (assuming you've downloaded 1.63.0 version for Visual Studio 2015). +The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder. + +Linux +^^^^^ + +On Linux, a GPU version of LightGBM (``device_type=gpu``) can be built using + +- **CMake**, **OpenCL**, **Boost** and **gcc**; +- **CMake**, **OpenCL**, **Boost** and **Clang**. + +**OpenCL** headers and libraries are usually provided by GPU manufacture. +The generic OpenCL ICD packages (for example, Debian packages ``ocl-icd-libopencl1``, ``ocl-icd-opencl-dev``, ``pocl-opencl-icd``) can also be used. + +Required **Boost** libraries (Boost.Align, Boost.System, Boost.Filesystem, Boost.Chrono) should be provided by the following Debian packages: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``, ``libboost-chrono-dev``. + +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. + +gcc +*** + +1. Install `CMake`_, **gcc**, **OpenCL** and **Boost**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_GPU=ON + # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: + # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_, **Clang**, **OpenMP**, **OpenCL** and **Boost**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_GPU=ON + # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: + # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ + cmake --build build -j4 + +macOS +^^^^^ + +The GPU version is not supported on macOS. + Docker ^^^^^^ @@ -574,60 +646,84 @@ Refer to `GPU Docker folder `__ of LightGBM (``device_type=gpu``) is based on OpenCL. +The `original GPU version <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL. -The CUDA-based build (``device_type=cuda``) is a separate implementation. +The CUDA-based version (``device_type=cuda``) is a separate implementation. Use this version in Linux environments with an NVIDIA GPU with compute capability 6.0 or higher. +Windows +^^^^^^^ + +The CUDA version is not supported on Windows. +Use the `GPU version <#build-gpu-version>`__ (``device_type=gpu``) for GPU acceleration on Windows. + Linux ^^^^^ -On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and **gcc** or **Clang**. +On Linux, a CUDA version of LightGBM can be built using -The following dependencies should be installed before compilation: +- **CMake**, **gcc** and **CUDA**; +- **CMake**, **Clang** and **CUDA**. -- **CUDA** 11.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers. +Please refer to `this detailed guide`_ for **CUDA** libraries installation. -- **CMake** +After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder. -To build LightGBM CUDA version, run the following commands: +gcc +*** -.. code:: sh +1. Install `CMake`_, **gcc** and **CUDA**. + +2. Run the following commands: - git clone --recursive https://github.com/microsoft/LightGBM - cd LightGBM - cmake -B build -S . -DUSE_CUDA=1 - cmake --build build -j4 + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -DUSE_CUDA=ON + cmake --build build -j4 + +Clang +***** + +1. Install `CMake`_, **Clang**, **OpenMP** and **CUDA**. + +2. Run the following commands: -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_CUDA=ON + cmake --build build -j4 macOS ^^^^^ The CUDA version is not supported on macOS. -Windows -^^^^^^^ - -The CUDA version is not supported on Windows. -Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows. - Build Java Wrapper ~~~~~~~~~~~~~~~~~~ Using the following instructions you can generate a JAR file containing the LightGBM `C API <./Development-Guide.rst#c-api>`__ wrapped by **SWIG**. +After compilation the ``.jar`` file will be in ``LightGBM/build`` folder. + Windows ^^^^^^^ -On Windows a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **VS Build Tools** or **MinGW**. +On Windows, a Java wrapper of LightGBM can be built using + +- **Java**, **SWIG**, **CMake** and **VS Build Tools**; +- **Java**, **SWIG**, **CMake** and **MinGW**. VS Build Tools ************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). -2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 3. Run the following commands: @@ -638,14 +734,12 @@ VS Build Tools cmake -B build -S . -A x64 -DUSE_SWIG=ON cmake --build build --target ALL_BUILD --config Release -The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/Release`` folder. - MinGW-w64 ********* 1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_. -2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 3. Run the following commands: @@ -656,9 +750,7 @@ MinGW-w64 cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON cmake --build build -j4 -The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/`` folder. - -**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time if you encounter the ``sh.exe was found in your PATH`` error. +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. It is recommended to use **VS Build Tools (Visual Studio)** since it has better multithreading efficiency in **Windows** for many-core systems (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__). @@ -666,9 +758,15 @@ It is recommended to use **VS Build Tools (Visual Studio)** since it has better Linux ^^^^^ -On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **gcc** or **Clang**. +On Linux, a Java wrapper of LightGBM can be built using + +- **CMake**, **gcc**, **Java** and **SWIG**; +- **CMake**, **Clang**, **Java** and **SWIG**. + +gcc +*** -1. Install `CMake`_, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). +1. Install `CMake`_, **gcc**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). 2. Run the following commands: @@ -679,34 +777,40 @@ On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMa cmake -B build -S . -DUSE_SWIG=ON cmake --build build -j4 -**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this). +Clang +***** -macOS -^^^^^ +1. Install `CMake`_, **Clang**, **OpenMP**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly). -On macOS a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **Apple Clang** or **gcc**. +2. Run the following commands: -First, install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly). -Then, either follow the **Apple Clang** or **gcc** installation instructions below. + .. code:: sh -Apple Clang -*********** + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DUSE_SWIG=ON + cmake --build build -j4 -Only **Apple Clang** version 8.1 or higher is supported. +macOS +^^^^^ -1. Install `CMake`_ : +On macOS, a Java wrapper of LightGBM can be built using - .. code:: sh +- **CMake**, **Java**, **SWIG** and **Apple Clang**; +- **CMake**, **Java**, **SWIG** and **gcc**. - brew install cmake +Apple Clang +*********** -2. Install **OpenMP**: +1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **OpenMP**: .. code:: sh - brew install libomp + brew install cmake openjdk swig libomp + export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/" -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -718,19 +822,14 @@ Only **Apple Clang** version 8.1 or higher is supported. gcc *** -1. Install `CMake`_ : +1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **gcc**: .. code:: sh - brew install cmake - -2. Install **gcc**: - - .. code:: sh - - brew install gcc + brew install cmake openjdk swig gcc + export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/" -3. Run the following commands: +2. Run the following commands: .. code:: sh @@ -740,15 +839,31 @@ gcc cmake -B build -S . -DUSE_SWIG=ON cmake --build build -j4 +Build Python-package +~~~~~~~~~~~~~~~~~~~~ + +Refer to `Python-package folder `__. + +Build R-package +~~~~~~~~~~~~~~~ + +Refer to `R-package folder `__. + Build C++ Unit Tests ~~~~~~~~~~~~~~~~~~~~ Windows ^^^^^^^ -On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Build Tools**. +On Windows, C++ unit tests of LightGBM can be built using + +- **CMake** and **VS Build Tools**; +- **CMake** and **MinGW**. + +VS Build Tools +************** -1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed). +1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed). 2. Run the following commands: @@ -756,17 +871,43 @@ On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Bui git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm --config Debug The ``.exe`` file will be in ``LightGBM/Debug`` folder. +MinGW-w64 +********* + +1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_. + +2. Run the following commands: + + .. code:: console + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON + cmake --build build --target testlightgbm -j4 + +The ``.exe`` file will be in ``LightGBM/`` folder. + +**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error. + Linux ^^^^^ -On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** or **Clang**. +On Linux, a C++ unit tests of LightGBM can be built using + +- **CMake** and **gcc**; +- **CMake** and **Clang**. + +After compilation the executable file will be in ``LightGBM/`` folder. + +gcc +*** -1. Install `CMake`_. +1. Install `CMake`_ and **gcc**. 2. Run the following commands: @@ -774,24 +915,42 @@ On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** o git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON + cmake --build build --target testlightgbm -j4 + +Clang +***** + +1. Install `CMake`_, **Clang** and **OpenMP**. + +2. Run the following commands: + + .. code:: sh + + git clone --recursive https://github.com/microsoft/LightGBM + cd LightGBM + export CXX=clang++-14 CC=clang-14 # replace "14" with version of Clang installed on your machine + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 macOS ^^^^^ -On macOS a C++ unit tests of LightGBM can be built using **CMake** and **Apple Clang** or **gcc**. +On macOS, a C++ unit tests of LightGBM can be built using + +- **CMake** and **Apple Clang**; +- **CMake** and **gcc**. + +After compilation the executable file will be in ``LightGBM/`` folder. Apple Clang *********** -Only **Apple Clang** version 8.1 or higher is supported. - -1. Install `CMake`_ : +1. Install `CMake`_ and **OpenMP**: .. code:: sh - brew install cmake + brew install cmake libomp 2. Run the following commands: @@ -799,42 +958,32 @@ Only **Apple Clang** version 8.1 or higher is supported. git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 gcc *** -1. Install `CMake`_ : +1. Install `CMake`_ and **gcc**: .. code:: sh - brew install cmake - -2. Install **gcc**: + brew install cmake gcc - .. code:: sh - - brew install gcc - -3. Run the following commands: +2. Run the following commands: .. code:: sh git clone --recursive https://github.com/microsoft/LightGBM cd LightGBM export CXX=g++-7 CC=gcc-7 # replace "7" with version of gcc installed on your machine - cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF + cmake -B build -S . -DBUILD_CPP_TEST=ON cmake --build build --target testlightgbm -j4 .. |download artifacts| image:: ./_static/images/artifacts-not-available.svg :target: https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html -.. _Python-package: https://github.com/microsoft/LightGBM/tree/master/python-package - -.. _R-package: https://github.com/microsoft/LightGBM/tree/master/R-package - .. _Visual Studio: https://visualstudio.microsoft.com/downloads/ .. _Git for Windows: https://git-scm.com/download/win @@ -864,3 +1013,5 @@ gcc .. _this detailed guide: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html .. _following docs: https://github.com/google/sanitizers/wiki + +.. _Ninja: https://ninja-build.org diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js index c4717b8a0ee5..c6d21713fe5c 100644 --- a/docs/_static/js/script.js +++ b/docs/_static/js/script.js @@ -15,7 +15,7 @@ $(() => { /* Collapse specified sections in the installation guide */ if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) { $( - '', + '', ).appendTo("body"); const collapsible = [ "#build-threadless-version-not-recommended", @@ -23,6 +23,8 @@ $(() => { "#build-gpu-version", "#build-cuda-version", "#build-java-wrapper", + "#build-python-package", + "#build-r-package", "#build-c-unit-tests", ]; $.each(collapsible, (_, val) => { From 6e0b0a8be44b14ade10737288a26aa361a00a18e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 3 Dec 2024 20:05:31 -0600 Subject: [PATCH 16/27] [python-package] simplify scikit-learn 1.6+ tags support (#6735) --- python-package/lightgbm/compat.py | 10 ---------- python-package/lightgbm/sklearn.py | 15 +++++---------- tests/python_package_test/test_sklearn.py | 6 ++++++ 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py index 0b9444b0ecbf..96dee6522572 100644 --- a/python-package/lightgbm/compat.py +++ b/python-package/lightgbm/compat.py @@ -14,14 +14,6 @@ from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import assert_all_finite, check_array, check_X_y - # sklearn.utils Tags types can be imported unconditionally once - # lightgbm's minimum scikit-learn version is 1.6 or higher - try: - from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags - from sklearn.utils import RegressorTags as _sklearn_RegressorTags - except ImportError: - _sklearn_ClassifierTags = None - _sklearn_RegressorTags = None try: from sklearn.exceptions import NotFittedError from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold @@ -148,8 +140,6 @@ class _LGBMRegressorBase: # type: ignore _LGBMCheckClassificationTargets = None _LGBMComputeSampleWeight = None _LGBMValidateData = None - _sklearn_ClassifierTags = None - _sklearn_RegressorTags = None _sklearn_version = None # additional scikit-learn imports only for type hints diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index d730b66c3556..108ef1e14498 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -40,8 +40,6 @@ _LGBMModelBase, _LGBMRegressorBase, _LGBMValidateData, - _sklearn_ClassifierTags, - _sklearn_RegressorTags, _sklearn_version, dt_DataTable, pd_DataFrame, @@ -726,7 +724,7 @@ def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]: # take whatever tags are provided by BaseEstimator, then modify # them with LightGBM-specific values return self._update_sklearn_tags_from_dict( - tags=_LGBMModelBase.__sklearn_tags__(self), + tags=super().__sklearn_tags__(), tags_dict=self._more_tags(), ) @@ -1298,10 +1296,7 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - tags = LGBMModel.__sklearn_tags__(self) - tags.estimator_type = "regressor" - tags.regressor_tags = _sklearn_RegressorTags(multi_label=False) - return tags + return super().__sklearn_tags__() def fit( # type: ignore[override] self, @@ -1360,9 +1355,9 @@ def _more_tags(self) -> Dict[str, Any]: return tags def __sklearn_tags__(self) -> "_sklearn_Tags": - tags = LGBMModel.__sklearn_tags__(self) - tags.estimator_type = "classifier" - tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False) + tags = super().__sklearn_tags__() + tags.classifier_tags.multi_class = True + tags.classifier_tags.multi_label = False return tags def fit( # type: ignore[override] diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index d187e9df5a9f..1cdd047f1857 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -1488,6 +1488,12 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato assert sklearn_tags.input_tags.allow_nan is True assert sklearn_tags.input_tags.sparse is True assert sklearn_tags.target_tags.one_d_labels is True + if estimator_class is lgb.LGBMClassifier: + assert sklearn_tags.estimator_type == "classifier" + assert sklearn_tags.classifier_tags.multi_class is True + assert sklearn_tags.classifier_tags.multi_label is False + elif estimator_class is lgb.LGBMRegressor: + assert sklearn_tags.estimator_type == "regressor" @pytest.mark.parametrize("task", all_tasks) From d4d6c87db02a146ac6dc04b00f538e02a3b22250 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 5 Dec 2024 10:24:35 -0600 Subject: [PATCH 17/27] [c++] include wherever uint8_t is used (#6736) --- include/LightGBM/bin.h | 1 + include/LightGBM/cuda/cuda_column_data.hpp | 1 + include/LightGBM/cuda/cuda_row_data.hpp | 1 + include/LightGBM/dataset.h | 1 + include/LightGBM/feature_group.h | 1 + include/LightGBM/train_share_states.h | 1 + include/LightGBM/tree.h | 1 + src/c_api.cpp | 1 + src/io/cuda/cuda_column_data.cpp | 2 ++ src/io/json11.cpp | 1 + 10 files changed, 11 insertions(+) diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h index a33fcfa9c45c..5826f2387102 100644 --- a/include/LightGBM/bin.h +++ b/include/LightGBM/bin.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/cuda/cuda_column_data.hpp b/include/LightGBM/cuda/cuda_column_data.hpp index 314a178859c6..8875cd151d7d 100644 --- a/include/LightGBM/cuda/cuda_column_data.hpp +++ b/include/LightGBM/cuda/cuda_column_data.hpp @@ -13,6 +13,7 @@ #include #include +#include #include namespace LightGBM { diff --git a/include/LightGBM/cuda/cuda_row_data.hpp b/include/LightGBM/cuda/cuda_row_data.hpp index 1d4cb2f73b1e..85da72bc083d 100644 --- a/include/LightGBM/cuda/cuda_row_data.hpp +++ b/include/LightGBM/cuda/cuda_row_data.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #define COPY_SUBROW_BLOCK_SIZE_ROW_DATA (1024) diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index ef214b7cd89d..c2a4b62296f2 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h index f13a5fff966f..bcc0388ba507 100644 --- a/include/LightGBM/feature_group.h +++ b/include/LightGBM/feature_group.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/include/LightGBM/train_share_states.h b/include/LightGBM/train_share_states.h index f102668edf70..e4f4e4afea5f 100644 --- a/include/LightGBM/train_share_states.h +++ b/include/LightGBM/train_share_states.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h index c28ddd140c48..bc5af621e402 100644 --- a/include/LightGBM/tree.h +++ b/include/LightGBM/tree.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/src/c_api.cpp b/src/c_api.cpp index 98748bc9ff2f..cf6577ad5e2c 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/src/io/cuda/cuda_column_data.cpp b/src/io/cuda/cuda_column_data.cpp index eb0938c01225..415578847f07 100644 --- a/src/io/cuda/cuda_column_data.cpp +++ b/src/io/cuda/cuda_column_data.cpp @@ -7,6 +7,8 @@ #include +#include + namespace LightGBM { CUDAColumnData::CUDAColumnData(const data_size_t num_data, const int gpu_device_id) { diff --git a/src/io/json11.cpp b/src/io/json11.cpp index 32a9c9d718b7..acd09f9ecb12 100644 --- a/src/io/json11.cpp +++ b/src/io/json11.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include From 33764e131e3556a4fb5ee11901e91a03ad0c37b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Mon, 9 Dec 2024 16:06:19 -0600 Subject: [PATCH 18/27] [ci] set upper bound on dask (#6742) --- .ci/conda-envs/ci-core.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/conda-envs/ci-core.txt b/.ci/conda-envs/ci-core.txt index a0763580c7f3..46d20963ed98 100644 --- a/.ci/conda-envs/ci-core.txt +++ b/.ci/conda-envs/ci-core.txt @@ -18,7 +18,7 @@ # direct imports cffi>=1.16 -dask>=2023.5.0 +dask>=2023.5.0,<2024.12 joblib>=1.3.2 matplotlib-base>=3.7.3 numpy>=1.24.4 From ae76aad6a591ddd41723c12a3f236643bb0ba2c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Tue, 10 Dec 2024 03:11:52 -0600 Subject: [PATCH 19/27] [python-package] do not copy column-major numpy arrays when creating Dataset (#6721) * do not copy column-major numpy arrays when creating Dataset * fix logic * lint * code review * update test * move dataset test to basic * increase features * assert single layout --------- Co-authored-by: Nikita Titov --- python-package/lightgbm/basic.py | 28 ++++++++++++++----- tests/python_package_test/test_basic.py | 36 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 99a690f38993..1db55385af1b 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -188,6 +188,23 @@ def _get_sample_count(total_nrow: int, params: str) -> int: return sample_cnt.value +def _np2d_to_np1d(mat: np.ndarray) -> Tuple[np.ndarray, int]: + if mat.dtype in (np.float32, np.float64): + dtype = mat.dtype + else: + dtype = np.float32 + if mat.flags["F_CONTIGUOUS"]: + order = "F" + layout = _C_API_IS_COL_MAJOR + else: + order = "C" + layout = _C_API_IS_ROW_MAJOR + # ensure dtype and order, copies if either do not match + data = np.asarray(mat, dtype=dtype, order=order) + # flatten array without copying + return data.ravel(order=order), layout + + class _MissingType(Enum): NONE = "None" NAN = "NaN" @@ -684,7 +701,8 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va _C_API_DTYPE_INT32 = 2 _C_API_DTYPE_INT64 = 3 -"""Matrix is row major in Python""" +"""Macro definition of data order in matrix""" +_C_API_IS_COL_MAJOR = 0 _C_API_IS_ROW_MAJOR = 1 """Macro definition of prediction type in C API of LightGBM""" @@ -2297,11 +2315,7 @@ def __init_from_np2d( raise ValueError("Input numpy.ndarray must be 2 dimensional") self._handle = ctypes.c_void_p() - if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) - else: # change non-float data to float data, need to copy - data = np.asarray(mat.reshape(mat.size), dtype=np.float32) - + data, layout = _np2d_to_np1d(mat) ptr_data, type_ptr_data, _ = _c_float_array(data) _safe_call( _LIB.LGBM_DatasetCreateFromMat( @@ -2309,7 +2323,7 @@ def __init_from_np2d( ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[1]), - ctypes.c_int(_C_API_IS_ROW_MAJOR), + ctypes.c_int(layout), _c_str(params_str), ref_dataset, ctypes.byref(self._handle), diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 0dfe3e47fa11..bdd4d3f58b80 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -947,3 +947,39 @@ def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(c "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity." ) assert expected_warning in capsys.readouterr().out + + +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("dtype", ["float32", "int64"]) +def test_no_copy_in_dataset_from_numpy_2d(rng, order, dtype): + X = rng.random(size=(100, 3)) + X = np.require(X, dtype=dtype, requirements=order) + X1d, layout = lgb.basic._np2d_to_np1d(X) + if order == "F": + assert layout == lgb.basic._C_API_IS_COL_MAJOR + else: + assert layout == lgb.basic._C_API_IS_ROW_MAJOR + if dtype == "float32": + assert np.shares_memory(X, X1d) + else: + # makes a copy + assert not np.shares_memory(X, X1d) + + +def test_equal_datasets_from_row_major_and_col_major_data(tmp_path): + # row-major dataset + X_row, y = make_blobs(n_samples=1_000, n_features=3, centers=2) + assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"] + ds_row = lgb.Dataset(X_row, y) + ds_row_path = tmp_path / "ds_row.txt" + ds_row._dump_text(ds_row_path) + + # col-major dataset + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"] + ds_col = lgb.Dataset(X_col, y) + ds_col_path = tmp_path / "ds_col.txt" + ds_col._dump_text(ds_col_path) + + # check datasets are equal + assert filecmp.cmp(ds_row_path, ds_col_path) From 186c7cd47a72c080ccfccdf799b0fbe8da2ff53a Mon Sep 17 00:00:00 2001 From: Murphy Liang Date: Wed, 11 Dec 2024 12:22:38 +0800 Subject: [PATCH 20/27] [c++] fix parallel_tree_learner_split_info (#6738) Co-authored-by: Nikita Titov Co-authored-by: shiyu1994 --- src/treelearner/split_info.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/split_info.hpp b/src/treelearner/split_info.hpp index 234105eb9a34..8d33a6a76854 100644 --- a/src/treelearner/split_info.hpp +++ b/src/treelearner/split_info.hpp @@ -53,7 +53,7 @@ struct SplitInfo { bool default_left = true; int8_t monotone_type = 0; inline static int Size(int max_cat_threshold) { - return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t); + return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t) + sizeof(int64_t)*2; } inline void CopyTo(char* buffer) const { From 53e0ddf7cd6eb281e3bec6273b19ff541c69bfa6 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Wed, 11 Dec 2024 22:40:37 -0500 Subject: [PATCH 21/27] [R-package] Avoid bashisms (non-POSIX code) in R-package/configure (#6746) --- R-package/configure | 8 ++++---- R-package/configure.ac | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R-package/configure b/R-package/configure index 11d691674f69..56a1fcc49105 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1789,7 +1789,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mmprefetch}" >&5 printf "%s\n" "${ac_mmprefetch}" >&6; } if test "${ac_mmprefetch}" = yes; then - LGB_CPPFLAGS+=" -DMM_PREFETCH=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1" fi ############ @@ -1824,7 +1824,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mm_malloc}" >&5 printf "%s\n" "${ac_mm_malloc}" >&6; } if test "${ac_mm_malloc}" = yes; then - LGB_CPPFLAGS+=" -DMM_MALLOC=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1" fi ########## @@ -1850,11 +1850,11 @@ then # If Homebrew is found and libomp was installed with it, this code adds the necessary # flags for the compiler to find libomp headers and for the linker to find libomp.dylib. HOMEBREW_LIBOMP_PREFIX="" - if command -v brew &> /dev/null; then + if command -v brew >/dev/null 2>&1; then ac_brew_openmp=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP was installed via Homebrew" >&5 printf %s "checking whether OpenMP was installed via Homebrew... " >&6; } - brew --prefix libomp &>/dev/null && ac_brew_openmp=yes + brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_brew_openmp}" >&5 printf "%s\n" "${ac_brew_openmp}" >&6; } if test "${ac_brew_openmp}" = yes; then diff --git a/R-package/configure.ac b/R-package/configure.ac index dad365be691c..d0f0462aef60 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -60,7 +60,7 @@ AC_LANG_CONFTEST( ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mmprefetch=yes AC_MSG_RESULT([${ac_mmprefetch}]) if test "${ac_mmprefetch}" = yes; then - LGB_CPPFLAGS+=" -DMM_PREFETCH=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1" fi ############ @@ -86,7 +86,7 @@ AC_LANG_CONFTEST( ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mm_malloc=yes AC_MSG_RESULT([${ac_mm_malloc}]) if test "${ac_mm_malloc}" = yes; then - LGB_CPPFLAGS+=" -DMM_MALLOC=1" + LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1" fi ########## @@ -112,10 +112,10 @@ then # If Homebrew is found and libomp was installed with it, this code adds the necessary # flags for the compiler to find libomp headers and for the linker to find libomp.dylib. HOMEBREW_LIBOMP_PREFIX="" - if command -v brew &> /dev/null; then + if command -v brew >/dev/null 2>&1; then ac_brew_openmp=no AC_MSG_CHECKING([whether OpenMP was installed via Homebrew]) - brew --prefix libomp &>/dev/null && ac_brew_openmp=yes + brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes AC_MSG_RESULT([${ac_brew_openmp}]) if test "${ac_brew_openmp}" = yes; then HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp` From b33a12ea3883f306388e69f12ceb421b1ee7ec29 Mon Sep 17 00:00:00 2001 From: shiyu1994 Date: Sat, 14 Dec 2024 12:12:17 +0800 Subject: [PATCH 22/27] [fix] resolve potential attack in linker connection building (#6752) --- src/network/linkers_socket.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/network/linkers_socket.cpp b/src/network/linkers_socket.cpp index 69e92a81b8eb..91d618bf1a2d 100644 --- a/src/network/linkers_socket.cpp +++ b/src/network/linkers_socket.cpp @@ -157,6 +157,9 @@ void Linkers::ListenThread(int incoming_cnt) { } int* ptr_in_rank = reinterpret_cast(buffer); int in_rank = *ptr_in_rank; + if (in_rank < 0 || in_rank >= num_machines_) { + Log::Fatal("Invalid rank %d found during initialization of linkers. The world size is %d.", in_rank, num_machines_); + } // add new socket SetLinker(in_rank, handler); ++connected_cnt; From 1090a93b39e16f49621aa6824cd09d4390c3678a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Sat, 14 Dec 2024 23:45:10 -0600 Subject: [PATCH 23/27] [python-package] do not copy column-major numpy arrays when predicting (#6751) --- python-package/lightgbm/basic.py | 7 ++----- tests/python_package_test/test_engine.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1db55385af1b..0f2e3697f6ec 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1291,10 +1291,7 @@ def __inner_predict_np2d( predict_type: int, preds: Optional[np.ndarray], ) -> Tuple[np.ndarray, int]: - if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) - else: # change non-float data to float data, need to copy - data = np.array(mat.reshape(mat.size), dtype=np.float32) + data, layout = _np2d_to_np1d(mat) ptr_data, type_ptr_data, _ = _c_float_array(data) n_preds = self.__get_num_preds( start_iteration=start_iteration, @@ -1314,7 +1311,7 @@ def __inner_predict_np2d( ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[1]), - ctypes.c_int(_C_API_IS_ROW_MAJOR), + ctypes.c_int(layout), ctypes.c_int(predict_type), ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cb2e893c9612..05afddb77c77 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4611,3 +4611,18 @@ def test_bagging_by_query_in_lambdarank(): ndcg_score_no_bagging_by_query = gbm_no_bagging_by_query.best_score["valid_0"]["ndcg@5"] assert ndcg_score_bagging_by_query >= ndcg_score - 0.1 assert ndcg_score_no_bagging_by_query >= ndcg_score - 0.1 + + +def test_equal_predict_from_row_major_and_col_major_data(): + X_row, y = make_synthetic_regression() + assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"] + ds = lgb.Dataset(X_row, y) + params = {"num_leaves": 8, "verbose": -1} + bst = lgb.train(params, ds, num_boost_round=5) + preds_row = bst.predict(X_row) + + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"] + preds_col = bst.predict(X_col) + + np.testing.assert_allclose(preds_row, preds_col) From c2f3807c73266b246a9aa74c670e4ab2940cde3e Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 15 Dec 2024 16:09:36 +0300 Subject: [PATCH 24/27] [ci] use Ruff linter instead of isort (#6755) * Update append-comment.sh * Update static_analysis.yml * Update static_analysis.yml * Update basic.py * Update basic.py * Update .pre-commit-config.yaml * Update basic.py * Update basic.py * Update basic.py * Update basic.py * Update basic.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update interactive_plot_example.ipynb * Update pyproject.toml * Update append-comment.sh * Update basic.py * Update basic.py * Update pyproject.toml * Update .pre-commit-config.yaml * Update basic.py * Update basic.py * Update test_basic.R * Update rank_objective.hpp * Update histogram_16_64_256.cu * Update static_analysis.yml * ensure alphabetical order of rules --- .pre-commit-config.yaml | 10 ++------ R-package/tests/testthat/test_basic.R | 2 +- .../notebooks/interactive_plot_example.ipynb | 2 +- python-package/lightgbm/basic.py | 8 +++---- python-package/pyproject.toml | 24 ++++++++----------- src/objective/rank_objective.hpp | 2 +- .../kernels/histogram_16_64_256.cu | 2 +- 7 files changed, 20 insertions(+), 30 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b334db19b8e7..0edab8df1be6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,15 +17,9 @@ repos: hooks: - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - args: ["--settings-path", "python-package/pyproject.toml"] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.7.0 + rev: v0.8.3 hooks: # Run the linter. - id: ruff @@ -40,7 +34,7 @@ repos: hooks: - id: shellcheck - repo: https://github.com/crate-ci/typos - rev: v1.23.2 + rev: v1.28.3 hooks: - id: typos args: ["--force-exclude"] diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 7310815c4a6d..06d35a146d66 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -2345,7 +2345,7 @@ test_that("early stopping works with lgb.cv()", { # never changes, its first iteration was the best oone expect_equal(bst$best_iter, 1L) - # best_score should be taken from the first metri + # best_score should be taken from the first metric expect_equal(bst$best_score, 0.2) # early stopping should have happened, since constant_metric was the first diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb index cc8efa2c187b..a8abdf325d9d 100644 --- a/examples/python-guide/notebooks/interactive_plot_example.ipynb +++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb @@ -30,7 +30,7 @@ "try:\n", " # To enable interactive mode you should install ipywidgets\n", " # https://github.com/jupyter-widgets/ipywidgets\n", - " from ipywidgets import interact, SelectMultiple\n", + " from ipywidgets import SelectMultiple, interact\n", "\n", " INTERACTIVE = True\n", "except ImportError:\n", diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 0f2e3697f6ec..e06290dc1c5f 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2504,13 +2504,13 @@ def _compare_params_for_warning( compare_result : bool Returns whether two dictionaries with params are equal. """ - for k in other_params: + for k, v in other_params.items(): if k not in ignore_keys: - if k not in params or params[k] != other_params[k]: + if k not in params or params[k] != v: return False - for k in params: + for k, v in params.items(): if k not in ignore_keys: - if k not in other_params or params[k] != other_params[k]: + if k not in other_params or v != other_params[k]: return False return True diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 19866e01202b..8fcc85814db5 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -84,17 +84,6 @@ minimum-version = "build-system.requires" # end:build-system -[tool.isort] -include_trailing_comma = true -line_length = 120 -# "vertical hanging indent", to match what ruff-format does -# ref: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html#3-vertical-hanging-indent -multi_line_output = 3 -skip_glob = [ - "*/external_libs/*", - "*/lightgbm-python/*", -] - [tool.mypy] disallow_untyped_defs = true exclude = 'build/*|compile/*|docs/*|examples/*|external_libs/*|lightgbm-python/*|tests/*' @@ -140,7 +129,7 @@ ignore = [ "PLR1714", # (pylint) Magic value used in comparison "PLR2004", - # (pylint) for loop veriable overwritten by assignment target + # (pylint) for loop variable overwritten by assignment target "PLW2901", # (pylint) use 'elif' instead of 'else' then 'if', to reduce indentation "PLR5501" @@ -152,10 +141,12 @@ select = [ "C4", # pydocstyle "D", - # pycodestyle + # pycodestyle (errors) "E", # pyflakes "F", + # isort + "I", # NumPy-specific rules "NPY", # pylint @@ -166,11 +157,13 @@ select = [ "SIM401", # flake8-print "T", + # pycodestyle (warnings) + "W", ] [tool.ruff.lint.per-file-ignores] "docs/conf.py" = [ - # (flake8-bugbear) raise exceptions with "raise ... from errr" + # (flake8-bugbear) raise exceptions with "raise ... from err" "B904", # (flake8-print) flake8-print "T" @@ -196,3 +189,6 @@ select = [ [tool.ruff.lint.pydocstyle] convention = "numpy" + +[tool.ruff.lint.isort] +known-first-party = ["lightgbm"] diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index ba8496ec4864..8227c7b65658 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -204,7 +204,7 @@ class LambdarankNDCG : public RankingObjective { } const double worst_score = score[sorted_idx[worst_idx]]; double sum_lambdas = 0.0; - // start accmulate lambdas by pairs that contain at least one document above truncation level + // start accumulate lambdas by pairs that contain at least one document above truncation level for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { if (score[sorted_idx[i]] == kMinScore) { continue; } for (data_size_t j = i + 1; j < cnt; ++j) { diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu index 59662fb19d55..9d8427a6f9a8 100644 --- a/src/treelearner/kernels/histogram_16_64_256.cu +++ b/src/treelearner/kernels/histogram_16_64_256.cu @@ -150,7 +150,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base, // size of threads that process this feature4 const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups); - // equavalent thread ID in this subgroup for this feature4 + // equivalent thread ID in this subgroup for this feature4 const unsigned int subglobal_tid = gtid - feature_id * subglobal_size; From 31205fc8f816c677988f56f7699e78120a8f193c Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 15 Dec 2024 21:24:21 +0300 Subject: [PATCH 25/27] [ci] remove Docker volumes during Azure cleanup (#6760) --- .vsts-ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 3a111e10898e..40424840c82d 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -69,15 +69,17 @@ jobs: # check disk usage print-diagnostics # remove old containers, container images, volumes - # ref: https://stackoverflow.com/a/32723127/3986677) + # ref: https://stackoverflow.com/a/32723127/3986677 + # ref: https://depot.dev/blog/docker-clear-cache#removing-everything-with-docker-system-prune echo "---- running 'docker system prune' ----" /tmp/docker system prune \ --all \ --force \ + --volumes \ --filter until=720h # check disk usage again print-diagnostics - displayName: clean + displayName: Clean ########################################### - job: Linux ########################################### From 8eb3c3c625f6e5035a1da718d5fbd6c0bd0dcc9a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 15 Dec 2024 19:19:40 -0600 Subject: [PATCH 26/27] [ci] fix linkchecker job (#6757) --- .ci/test.sh | 4 ++-- docs/.linkcheckerrc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.ci/test.sh b/.ci/test.sh index cc8831f94c09..82c159064a33 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -146,8 +146,8 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then make -C docs html || exit 1 if [[ $TASK == "check-links" ]]; then # check docs for broken links - pip install linkchecker - linkchecker --config=.linkcheckerrc ./docs/_build/html/*.html || exit 1 + pip install 'linkchecker>=10.5.0' + linkchecker --config=./docs/.linkcheckerrc ./docs/_build/html/*.html || exit 1 exit 0 fi # check the consistency of parameters' descriptions and other stuff diff --git a/docs/.linkcheckerrc b/docs/.linkcheckerrc index 003d8699a875..a4707aa536ea 100644 --- a/docs/.linkcheckerrc +++ b/docs/.linkcheckerrc @@ -1,9 +1,9 @@ [checking] -maxrequestspersecond=1 +maxrequestspersecond=0.1 recursionlevel=1 anchors=1 sslverify=0 -threads=1 +threads=4 [filtering] ignore= From 480600b3afaf2a0a6f32cf417edf9567f625b2c3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 16 Dec 2024 10:45:06 -0600 Subject: [PATCH 27/27] [python-package] simplify eval result printing (#6749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Morales --- python-package/lightgbm/callback.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index ae1e72c549d4..c64fb8ba755b 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -73,15 +73,13 @@ class CallbackEnv: def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str: """Format metric string.""" - if len(value) == 4: - return f"{value[0]}'s {value[1]}: {value[2]:g}" - elif len(value) == 5: - if show_stdv: - return f"{value[0]}'s {value[1]}: {value[2]:g} + {value[4]:g}" # type: ignore[misc] - else: - return f"{value[0]}'s {value[1]}: {value[2]:g}" - else: - raise ValueError("Wrong metric value") + dataset_name, metric_name, metric_value, *_ = value + out = f"{dataset_name}'s {metric_name}: {metric_value:g}" + # tuples from cv() sometimes have a 5th item, with standard deviation of + # the evaluation metric (taken over all cross-validation folds) + if show_stdv and len(value) == 5: + out += f" + {value[4]:g}" + return out class _LogEvaluationCallback: