From 81ceaec08d7dac36fa5067241bf67ef111c36953 Mon Sep 17 00:00:00 2001
From: Jirair Aroyan <165020043+JAroyan@users.noreply.github.com>
Date: Fri, 25 Oct 2024 17:59:32 +0200
Subject: [PATCH 01/27] [docs] Remove only cpu note due to gpu support for
 linear trees (#6686)

* Remove only cpu note

* Change Note for linear tree

---------

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 docs/Parameters.rst       | 2 +-
 include/LightGBM/config.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index e5cfaf2dc560..1f80a13d5731 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -795,7 +795,7 @@ Dataset Parameters
 
    -  it is recommended to rescale data before training so that features have similar mean and standard deviation
 
-   -  **Note**: works only with ``cpu`` device type and ``serial`` tree learner
+   -  **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner
 
    -  **Note**: ``regression_l1`` objective is not supported with linear tree boosting
 
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 4a73c30f9001..d5b56f0fd1fb 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -664,7 +664,7 @@ struct Config {
   // desc = categorical features are used for splits as normal but are not used in the linear models
   // desc = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
   // desc = it is recommended to rescale data before training so that features have similar mean and standard deviation
-  // desc = **Note**: works only with ``cpu`` device type and ``serial`` tree learner
+  // desc = **Note**: works only with ``cpu``, ``gpu`` device type and ``serial`` tree learner
   // desc = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
   // desc = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
   // desc = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves

From c9d1ac7beac4426c8e636a392bde0f995d1ae8fb Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 26 Oct 2024 12:31:39 -0500
Subject: [PATCH 02/27] [python-package] remove MSVS solution files from sdist
 (#6698)

* [python-package] remove MSVS solution files from sdist

* remove one more line

---------

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 build-python.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/build-python.sh b/build-python.sh
index cf790737729e..ff37e4afe225 100755
--- a/build-python.sh
+++ b/build-python.sh
@@ -205,7 +205,6 @@ create_isolated_source_dir() {
     cp -R ./include ./lightgbm-python
     cp -R ./src ./lightgbm-python
     cp -R ./swig ./lightgbm-python
-    cp -R ./windows ./lightgbm-python
 
     # include only specific files from external_libs, to keep the package
     # small and avoid redistributing code with licenses incompatible with
@@ -303,8 +302,7 @@ if test "${INSTALL}" = true; then
             ./external_libs \
             ./include \
             ./src \
-            ./swig \
-            ./windows
+            ./swig
         # use regular-old setuptools for these builds, to avoid
         # trying to recompile the shared library
         sed -i.bak -e '/start:build-system/,/end:build-system/d' pyproject.toml

From 9b351e6ead39c4274ee7a9a6f3c1acf6a77bd2ce Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 29 Oct 2024 12:43:12 +0300
Subject: [PATCH 03/27] [ci] Fix version matching between RTD pages and
 R-package pages (#6673)

* Update script.js

* Update script.js

* Update script.js

* Update script.js

* replace url at build time

* manipulate with raw files
---
 docs/_static/js/script.js | 17 ++---------------
 docs/conf.py              | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index 107a6a4969a3..3cfc90de887d 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -4,25 +4,12 @@ $(function() {
 
     /* List each class property item on a new line
        https://github.com/microsoft/LightGBM/issues/5073 */
-    if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') != -1) {
+    if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') !== -1) {
         $('.py.property').each(function() { this.style.setProperty('display', 'inline', 'important'); });
     }
 
-    /* Point to the same version of R API as the current docs version */
-    var current_version_elems = $('.rst-current-version');
-    if(current_version_elems.length !== 0) {
-        var current_version = $(current_version_elems[0]).contents().filter(function() {
-            return this.nodeType == 3;
-        }).text().trim().split(' ').pop();
-        if(current_version !== 'latest') {
-            $('a.reference.external[href$="/latest/R/reference/"]').each(function() {
-                $(this).attr('href', function (_, val) { return val.replace('/latest/', '/' + current_version + '/'); });
-            });
-        }
-    }
-
     /* Collapse specified sections in the installation guide */
-    if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') != -1) {
+    if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') !== -1) {
         $('<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>').appendTo('body');
         var collapsable = [
             '#build-threadless-version-not-recommended',
diff --git a/docs/conf.py b/docs/conf.py
index f8bd29a69922..256787bf7f8d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,6 +39,7 @@
 sys.path.insert(0, str(LIB_PATH))
 
 INTERNAL_REF_REGEX = compile(r"(?P<url>\.\/.+)(?P<extension>\.rst)(?P<anchor>$|#)")
+RTD_R_REF_REGEX = compile(r"(?P<begin>https://.+/)(?P<rtd_version>latest)(?P<end>/R/reference/)")
 
 
 class InternalRefTransform(Transform):
@@ -69,6 +70,7 @@ def run(self) -> List:
 os.environ["LIGHTGBM_BUILD_DOC"] = "1"
 C_API = os.environ.get("C_API", "").lower().strip() != "no"
 RTD = bool(os.environ.get("READTHEDOCS", ""))
+RTD_VERSION = os.environ.get("READTHEDOCS_VERSION", "stable")
 
 # If your documentation needs a minimal Sphinx version, state it here.
 needs_sphinx = "2.1.0"  # Due to sphinx.ext.napoleon, autodoc_typehints
@@ -309,6 +311,22 @@ def generate_r_docs(app: Sphinx) -> None:
         raise Exception(f"An error has occurred while generating documentation for R-package\n{e}")
 
 
+def replace_reference_to_r_docs(app: Sphinx) -> None:
+    """Make reference to R-package documentation point to the actual version.
+
+    Parameters
+    ----------
+    app : sphinx.application.Sphinx
+        The application object representing the Sphinx process.
+    """
+    index_doc_path = CURR_PATH / "index.rst"
+    with open(index_doc_path, "r+t", encoding="utf-8") as index_doc:
+        content = index_doc.read()
+        content = RTD_R_REF_REGEX.sub(rf"\g<begin>{RTD_VERSION}\g<end>", content)
+        index_doc.seek(0)
+        index_doc.write(content)
+
+
 def setup(app: Sphinx) -> None:
     """Add new elements at Sphinx initialization time.
 
@@ -330,6 +348,7 @@ def setup(app: Sphinx) -> None:
         app.connect(
             "build-finished", lambda app, _: copytree(CURR_PATH.parent / "lightgbm_r" / "docs", Path(app.outdir) / "R")
         )
+    app.connect("builder-inited", replace_reference_to_r_docs)
     app.add_transform(InternalRefTransform)
     add_js_file = getattr(app, "add_js_file", False) or app.add_javascript
     add_js_file("js/script.js")

From 4a60a53c38da1356effa65b4d7bee720167d0fc5 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 29 Oct 2024 05:47:25 -0500
Subject: [PATCH 04/27] [ci] [R-package] remove code for 'rchk' checks (#6545)

* [ci] [R-package] re-enable 'rchk' checks

* comment out most CI

* it would be helpful to actually enable rchk

* remove all Rf_unprotect() calls to try to trigger failure

* try removing some Rf_protect() calls

* revert CI changes

* just remove rchk

* revert whitespace changes
---
 .ci/setup.sh          |  2 +-
 .ci/test-r-package.sh | 29 +----------------------------
 .ci/test.sh           |  2 +-
 3 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index e551b1683aef..30d564b2d5f4 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -142,7 +142,7 @@ else  # Linux
     fi
 fi
 
-if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then
+if [[ "${TASK}" != "r-package" ]]; then
     if [[ $SETUP_CONDA != "false" ]]; then
         curl \
             -sL \
diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh
index 7d821676bb71..ae205213d787 100755
--- a/.ci/test-r-package.sh
+++ b/.ci/test-r-package.sh
@@ -125,12 +125,7 @@ Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/A
 
 # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat'
 # to avoid a CI-time dependency on devtools (for devtools::install_deps())
-# NOTE: testthat is not required when running rchk
-if [[ "${TASK}" == "r-rchk" ]]; then
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl')"
-else
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
-fi
+packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
 compile_from_source="both"
 if [[ $OS_NAME == "macos" ]]; then
     packages+=", type = 'binary'"
@@ -166,28 +161,6 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
 
     ./build-cran-package.sh || exit 1
 
-    if [[ "${TASK}" == "r-rchk" ]]; then
-        echo "Checking R-package with rchk"
-        mkdir -p packages
-        cp "${PKG_TARBALL}" packages
-        RCHK_LOG_FILE="rchk-logs.txt"
-        docker run \
-            -v "$(pwd)/packages:/rchk/packages" \
-            kalibera/rchk:latest \
-            "/rchk/packages/${PKG_TARBALL}" \
-        > "${RCHK_LOG_FILE}" 2>&1  \
-        || (cat ${RCHK_LOG_FILE} && exit 1)
-        cat ${RCHK_LOG_FILE}
-
-        # the exceptions below are from R itself and not LightGBM:
-        # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156
-        exit "$(
-            grep "${RCHK_LOG_FILE}" -v "in function strptime_internal" \
-            | grep -v "in function RunGenCollect" \
-            | grep --count -E '\[PB\]|ERROR'
-        )"
-    fi
-
     # Test CRAN source .tar.gz in a directory that is not this repo or below it.
     # When people install.packages('lightgbm'), they won't have the LightGBM
     # git repo around. This is to protect against the use of relative paths
diff --git a/.ci/test.sh b/.ci/test.sh
index 4bf44140dbfd..2fc7820a643d 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -42,7 +42,7 @@ else
     export MACOSX_DEPLOYMENT_TARGET=12.0
 fi
 
-if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
+if [[ "${TASK}" == "r-package" ]]; then
     bash "${BUILD_DIRECTORY}/.ci/test-r-package.sh" || exit 1
     exit 0
 fi

From dc0ed538aa09b755ba4a293dc6f344da51674260 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Wed, 30 Oct 2024 01:49:43 +0300
Subject: [PATCH 05/27] [ci] check PowerShell scripts with PSScriptAnalyzer
 (part 1) (#6704)

* introdure PSScriptAnalyzer

* revert workflow

* run PSScriptAnalyzer before conda installation
---
 .ci/lint-powershell.ps1        | 56 ++++++++++++++++++++
 .ci/lint-r-code.R              |  1 -
 .ci/test-r-package-windows.ps1 | 97 ++++++++++++++++++----------------
 .ci/test-windows.ps1           | 60 ++++++++++-----------
 .ci/test.sh                    |  3 ++
 .editorconfig                  |  2 +-
 6 files changed, 140 insertions(+), 79 deletions(-)
 create mode 100644 .ci/lint-powershell.ps1

diff --git a/.ci/lint-powershell.ps1 b/.ci/lint-powershell.ps1
new file mode 100644
index 000000000000..b2e045917ab6
--- /dev/null
+++ b/.ci/lint-powershell.ps1
@@ -0,0 +1,56 @@
+$settings = @{
+    Severity = @(
+        'Information',
+        'Warning',
+        'Error'
+    )
+    IncludeDefaultRules = $true
+    # Additional rules that are disabled by default
+    Rules = @{
+        PSAvoidExclaimOperator = @{
+            Enable = $true
+        }
+        PSAvoidLongLines = @{
+            Enable = $true
+            MaximumLineLength = 120
+        }
+        PSAvoidSemicolonsAsLineTerminators = @{
+            Enable = $true
+        }
+        PSPlaceCloseBrace = @{
+            Enable = $true
+            NoEmptyLineBefore = $true
+            IgnoreOneLineBlock = $true
+            NewLineAfter = $false
+        }
+        PSPlaceOpenBrace = @{
+            Enable = $true
+            OnSameLine = $true
+            NewLineAfter = $true
+            IgnoreOneLineBlock = $true
+        }
+        PSUseConsistentIndentation = @{
+            Enable = $true
+            IndentationSize = 4
+            PipelineIndentation = 'IncreaseIndentationAfterEveryPipeline'
+            Kind = 'space'
+        }
+        PSUseConsistentWhitespace = @{
+            Enable = $true
+            CheckInnerBrace = $true
+            CheckOpenBrace = $true
+            CheckOpenParen = $true
+            CheckOperator = $true
+            CheckSeparator = $true
+            CheckPipe = $true
+            CheckPipeForRedundantWhitespace = $true
+            CheckParameter = $true
+            IgnoreAssignmentOperatorInsideHashTable = $false
+        }
+        PSUseCorrectCasing = @{
+            Enable = $true
+        }
+    }
+}
+
+Invoke-ScriptAnalyzer -Path "$env:BUILD_DIRECTORY/.ci" -Recurse -EnableExit -Settings $settings
diff --git a/.ci/lint-r-code.R b/.ci/lint-r-code.R
index 8de09c0ff1ac..9eae00aa5d49 100755
--- a/.ci/lint-r-code.R
+++ b/.ci/lint-r-code.R
@@ -1,4 +1,3 @@
-
 loadNamespace("lintr")
 
 args <- commandArgs(
diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index 269695c51462..57055db1a69f 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -1,16 +1,16 @@
 # Download a file and retry upon failure. This looks like
 # an infinite loop but CI-level timeouts will kill it
-function Download-File-With-Retries {
-  param(
-    [string]$url,
-    [string]$destfile
-  )
-  $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
-  do {
-    Write-Output "Downloading ${url}"
-    sleep 5;
-    Invoke-WebRequest -Uri $url -OutFile $destfile
-  } while(!$?);
+function Get-File-With-Tenacity {
+    param(
+        [Parameter(Mandatory = $true)][string]$url,
+        [Parameter(Mandatory = $true)][string]$destfile
+    )
+    $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
+    do {
+        Write-Output "Downloading ${url}"
+        sleep 5
+        Invoke-WebRequest -Uri $url -OutFile $destfile
+    } while (-not $?)
 }
 
 # External utilities like R.exe / Rscript.exe writing to stderr (even for harmless
@@ -20,20 +20,23 @@ function Download-File-With-Retries {
 # Using standard PowerShell redirection does not work to avoid these errors.
 # This function uses R's built-in redirection mechanism, sink(). Any place where
 # this function is used is a command that writes harmless messages to stderr
-function Run-R-Code-Redirect-Stderr {
-  param(
-    [string]$rcode
-  )
-  $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()"
-  Rscript --vanilla -e $decorated_code
+function Invoke-R-Code-Redirect-Stderr {
+    param(
+        [Parameter(Mandatory = $true)][string]$rcode
+    )
+    $decorated_code = "out_file <- file(tempfile(), open = 'wt'); sink(out_file, type = 'message'); $rcode; sink()"
+    Rscript --vanilla -e $decorated_code
 }
 
 # Remove all items matching some pattern from PATH environment variable
 function Remove-From-Path {
-  param(
-    [string]$pattern_to_remove
-  )
-  $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';'
+    [CmdletBinding(SupportsShouldProcess)]
+    param(
+        [Parameter(Mandatory = $true)][string]$pattern_to_remove
+    )
+    if ($PSCmdlet.ShouldProcess($env:PATH, "Removing ${pattern_to_remove}")) {
+        $env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -notmatch "$pattern_to_remove" }) -join ';'
+    }
 }
 
 # remove some details that exist in the GitHub Actions images which might
@@ -87,7 +90,7 @@ if ($env:R_MAJOR_VERSION -eq "3") {
   $env:R_WINDOWS_VERSION = "4.3.1"
 } else {
   Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION"
-  Check-Output $false
+  Assert-Output $false
 }
 $env:CMAKE_VERSION = "3.30.0"
 
@@ -120,29 +123,29 @@ tzutil /s "GMT Standard Time"
 
 # download R, RTools and CMake
 Write-Output "Downloading R, Rtools and CMake"
-Download-File-With-Retries -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
-Download-File-With-Retries -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe"
-Download-File-With-Retries -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip"
+Get-File-With-Tenacity -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
+Get-File-With-Tenacity -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe"
+Get-File-With-Tenacity -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip"
 
 # Install R
 Write-Output "Installing R"
-Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Check-Output $?
+Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Assert-Output $?
 Write-Output "Done installing R"
 
 Write-Output "Installing Rtools"
-Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Check-Output $?
+Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Assert-Output $?
 Write-Output "Done installing Rtools"
 
 Write-Output "Installing CMake"
 Add-Type -AssemblyName System.IO.Compression.FileSystem
-[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Check-Output $?
+[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $?
 # Remove old CMake shiped with RTools
 Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore
 Write-Output "Done installing CMake"
 
 Write-Output "Installing dependencies"
 $packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
-Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $?
+Invoke-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Assert-Output $?
 
 Write-Output "Building R-package"
 
@@ -163,9 +166,9 @@ if ($env:COMPILER -ne "MSVC") {
       $env:BUILD_R_FLAGS = "'--skip-install'"
     } else {
       Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN"
-      Check-Output $false
+      Assert-Output $false
     }
-    Run-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Check-Output $?
+    Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $?
   } elseif ($env:R_BUILD_TYPE -eq "cran") {
     # NOTE: gzip and tar are needed to create a CRAN package on Windows, but
     # some flavors of tar.exe can fail in some settings on Windows.
@@ -174,7 +177,7 @@ if ($env:COMPILER -ne "MSVC") {
     if ($env:R_MAJOR_VERSION -eq "3") {
       $env:PATH = "C:\msys64\usr\bin;" + $env:PATH
     }
-    Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Check-Output $?
+    Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $?
     Remove-From-Path ".*msys64.*"
     # Test CRAN source .tar.gz in a directory that is not this repo or below it.
     # When people install.packages('lightgbm'), they won't have the LightGBM
@@ -193,31 +196,31 @@ if ($env:COMPILER -ne "MSVC") {
   } else {
     $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
   }
-  Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
+  Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
 
   Write-Output "R CMD check build logs:"
   $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out"
   Get-Content -Path "$INSTALL_LOG_FILE_NAME"
 
-  Check-Output $check_succeeded
+  Assert-Output $check_succeeded
 
   Write-Output "Looking for issues with R CMD check results"
   if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) {
       echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
-      Check-Output $False
+      Assert-Output $False
   }
 
 } else {
   $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt"
-  Run-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
+  Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
   Write-Output "----- build and install logs -----"
   Get-Content -Path "$INSTALL_LOG_FILE_NAME"
   Write-Output "----- end of build and install logs -----"
-  Check-Output $install_succeeded
+  Assert-Output $install_succeeded
   # some errors are not raised above, but can be found in the logs
   if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) {
       echo "ERRORs have been found installing lightgbm"
-      Check-Output $False
+      Assert-Output $False
   }
 }
 
@@ -231,7 +234,7 @@ if ($env:TOOLCHAIN -ne "MSVC") {
 }
 if ($checks_cnt -eq 0) {
   Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs."
-  Check-Output $False
+  Assert-Output $False
 }
 
 # Checking that we actually got the expected compiler. The R-package has some logic
@@ -241,7 +244,7 @@ if ($env:R_BUILD_TYPE -eq "cmake") {
   $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER"
   if ($checks.Matches.length -eq 0) {
     Write-Output "The wrong compiler was used. Check the build logs."
-    Check-Output $False
+    Assert-Output $False
   }
 }
 
@@ -251,7 +254,7 @@ if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) {
   $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN"
   if ($checks.Matches.length -eq 0) {
     Write-Output "The wrong toolchain was used. Check the build logs."
-    Check-Output $False
+    Assert-Output $False
   }
 }
 
@@ -267,7 +270,7 @@ if ($env:R_BUILD_TYPE -eq "cran") {
 }
 if ($checks_cnt -eq 0) {
   Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs."
-  Check-Output $False
+  Assert-Output $False
 }
 
 # Checking that MM_MALLOC preprocessor definition is actually used in CI builds.
@@ -282,7 +285,7 @@ if ($env:R_BUILD_TYPE -eq "cran") {
 }
 if ($checks_cnt -eq 0) {
   Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs."
-  Check-Output $False
+  Assert-Output $False
 }
 
 # Checking that OpenMP is actually used in CMake builds.
@@ -290,17 +293,17 @@ if ($env:R_BUILD_TYPE -eq "cmake") {
   $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*"
   if ($checks.Matches.length -eq 0) {
     Write-Output "OpenMP wasn't found. Check the build logs."
-    Check-Output $False
+    Assert-Output $False
   }
 }
 
 if ($env:COMPILER -eq "MSVC") {
   Write-Output "Running tests with testthat.R"
   cd R-package/tests
-  # NOTE: using Rscript.exe intentionally here, instead of Run-R-Code-Redirect-Stderr,
-  #       because something about the interaction between Run-R-Code-Redirect-Stderr
+  # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr,
+  #       because something about the interaction between Invoke-R-Code-Redirect-Stderr
   #       and testthat results in failing tests not exiting with a non-0 exit code.
-  Rscript.exe --vanilla "testthat.R" ; Check-Output $?
+  Rscript.exe --vanilla "testthat.R" ; Assert-Output $?
 }
 
 Write-Output "No issues were found checking the R-package"
diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1
index a2c498531262..87c214856212 100644
--- a/.ci/test-windows.ps1
+++ b/.ci/test-windows.ps1
@@ -1,9 +1,9 @@
-function Check-Output {
-  param( [bool]$success )
-  if (!$success) {
-    $host.SetShouldExit(-1)
-    exit 1
-  }
+function Assert-Output {
+    param( [Parameter(Mandatory = $true)][bool]$success )
+    if (-not $success) {
+        $host.SetShouldExit(-1)
+        exit 1
+    }
 }
 
 $env:CONDA_ENV = "test-env"
@@ -17,14 +17,14 @@ Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore
 [Void][System.IO.Directory]::CreateDirectory($env:TMPDIR)
 
 if ($env:TASK -eq "r-package") {
-  & .\.ci\test-r-package-windows.ps1 ; Check-Output $?
+  & .\.ci\test-r-package-windows.ps1 ; Assert-Output $?
   Exit 0
 }
 
 if ($env:TASK -eq "cpp-tests") {
   cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64
-  cmake --build build --target testlightgbm --config Debug ; Check-Output $?
-  .\Debug\testlightgbm.exe ; Check-Output $?
+  cmake --build build --target testlightgbm --config Debug ; Assert-Output $?
+  .\Debug\testlightgbm.exe ; Assert-Output $?
   Exit 0
 }
 
@@ -33,23 +33,23 @@ if ($env:TASK -eq "swig") {
   $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
   Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl"
   Add-Type -AssemblyName System.IO.Compression.FileSystem
-  [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Check-Output $?
+  [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $?
   $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig"
   $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH
   $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log"
   cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $?
   Write-Output "CMake build logs:"
   Get-Content -Path "$BuildLogFileName"
-  Check-Output $build_succeeded
+  Assert-Output $build_succeeded
   $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe"
   $checks_cnt = $checks.Matches.length
   if ($checks_cnt -eq 0) {
     Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs."
-    Check-Output $False
+    Assert-Output $False
   }
-  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
+  cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
   if ($env:AZURE -eq "true") {
-    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
+    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $?
   }
   Exit 0
 }
@@ -72,7 +72,7 @@ conda create `
   -y `
   -n $env:CONDA_ENV `
   --file $env:CONDA_REQUIREMENT_FILE `
-  "python=$env:PYTHON_VERSION[build=*cpython]" ; Check-Output $?
+  "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $?
 
 if ($env:TASK -ne "bdist") {
   conda activate $env:CONDA_ENV
@@ -80,37 +80,37 @@ if ($env:TASK -ne "bdist") {
 
 cd $env:BUILD_SOURCESDIRECTORY
 if ($env:TASK -eq "regular") {
-  cmake -B build -S . -A x64 ; Check-Output $?
-  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
-  sh ./build-python.sh install --precompile ; Check-Output $?
+  cmake -B build -S . -A x64 ; Assert-Output $?
+  cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
+  sh ./build-python.sh install --precompile ; Assert-Output $?
   cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
   cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 }
 elseif ($env:TASK -eq "sdist") {
-  sh ./build-python.sh sdist ; Check-Output $?
-  sh ./.ci/check-python-dists.sh ./dist ; Check-Output $?
-  cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
+  sh ./build-python.sh sdist ; Assert-Output $?
+  sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
+  cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $?
 }
 elseif ($env:TASK -eq "bdist") {
   # Import the Chocolatey profile module so that the RefreshEnv command
   # invoked below properly updates the current PowerShell session environment.
   $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
-  Import-Module "$module" ; Check-Output $?
+  Import-Module "$module" ; Assert-Output $?
   RefreshEnv
 
   Write-Output "Current OpenCL drivers:"
   Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
 
   conda activate $env:CONDA_ENV
-  sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
-  sh ./.ci/check-python-dists.sh ./dist ; Check-Output $?
-  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
+  sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $?
+  sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
+  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $?
   cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
   if ($env:COMPILER -eq "MINGW") {
-    sh ./build-python.sh install --mingw ; Check-Output $?
+    sh ./build-python.sh install --mingw ; Assert-Output $?
   } else {
-    sh ./build-python.sh install; Check-Output $?
+    sh ./build-python.sh install; Assert-Output $?
   }
 }
 
@@ -125,7 +125,7 @@ if ($env:TASK -eq "bdist") {
   $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1"
 }
 
-pytest $tests ; Check-Output $?
+pytest $tests ; Assert-Output $?
 
 if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) {
   cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
@@ -134,9 +134,9 @@ if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -e
   conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2"
   foreach ($file in @(Get-ChildItem *.py)) {
     @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
-    python $file ; Check-Output $?
+    python $file ; Assert-Output $?
   }  # run all examples
   cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks
   (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb"
-  jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Check-Output $?  # run all notebooks
+  jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $?  # run all notebooks
 }
diff --git a/.ci/test.sh b/.ci/test.sh
index 2fc7820a643d..9b3e1ee3938d 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -98,6 +98,9 @@ if [[ $TASK == "swig" ]]; then
 fi
 
 if [[ $TASK == "lint" ]]; then
+    pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck"
+    echo "Linting PowerShell code"
+    pwsh -file "./.ci/lint-powershell.ps1" || exit 0
     conda create -q -y -n "${CONDA_ENV}" \
         "${CONDA_PYTHON_REQUIREMENT}" \
         'cmakelint>=1.4.3' \
diff --git a/.editorconfig b/.editorconfig
index f7bd94f4f905..f4ae446b64bb 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -7,7 +7,7 @@ insert_final_newline = true
 indent_style = space
 indent_size = 2
 
-[*.{py,sh,js}]
+[*.{py,sh,js,ps1}]
 indent_size = 4
 line_length = 120
 skip = external_libs

From 8d5dca2e3a6181ba788f8aa3d7a69d08e9d0ea07 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 31 Oct 2024 08:48:47 -0500
Subject: [PATCH 06/27] [python-package] remove support for passing
 'feature_name' and 'categorical_feature' through train() and cv() (#6706)

---
 python-package/lightgbm/engine.py        | 69 +-----------------------
 tests/python_package_test/test_engine.py | 51 +++++++++++-------
 2 files changed, 33 insertions(+), 87 deletions(-)

diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 89910599b0ca..dca6b607cdc7 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -3,7 +3,6 @@
 
 import copy
 import json
-import warnings
 from collections import OrderedDict, defaultdict
 from operator import attrgetter
 from pathlib import Path
@@ -15,17 +14,14 @@
 from .basic import (
     Booster,
     Dataset,
-    LGBMDeprecationWarning,
     LightGBMError,
     _choose_param_value,
     _ConfigAliases,
     _InnerPredictor,
     _LGBM_BoosterEvalMethodResultType,
     _LGBM_BoosterEvalMethodResultWithStandardDeviationType,
-    _LGBM_CategoricalFeatureConfiguration,
     _LGBM_CustomObjectiveFunction,
     _LGBM_EvalFunctionResultType,
-    _LGBM_FeatureNameConfiguration,
     _log_warning,
 )
 from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold
@@ -54,15 +50,6 @@
 ]
 
 
-def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None:
-    msg = (
-        f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in "
-        f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. "
-        "See https://github.com/microsoft/LightGBM/issues/6435."
-    )
-    warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
-
-
 def _choose_num_iterations(num_boost_round_kwarg: int, params: Dict[str, Any]) -> Dict[str, Any]:
     """Choose number of boosting rounds.
 
@@ -127,8 +114,6 @@ def train(
     valid_names: Optional[List[str]] = None,
     feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
     init_model: Optional[Union[str, Path, Booster]] = None,
-    feature_name: _LGBM_FeatureNameConfiguration = "auto",
-    categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
     keep_training_booster: bool = False,
     callbacks: Optional[List[Callable]] = None,
 ) -> Booster:
@@ -170,21 +155,6 @@ def train(
         set the ``metric`` parameter to the string ``"None"`` in ``params``.
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
-    feature_name : list of str, or 'auto', optional (default="auto")
-        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
-        Feature names.
-        If 'auto' and data is pandas DataFrame, data columns names are used.
-    categorical_feature : list of str or int, or 'auto', optional (default="auto")
-        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
-        Categorical features.
-        If list of int, interpreted as indices.
-        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
-        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
-        All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647).
-        Large values could be memory consuming. Consider using consecutive integers starting from zero.
-        All negative values in categorical features will be treated as missing values.
-        The output cannot be monotonically constrained with respect to a categorical feature.
-        Floating point numbers in categorical features will be rounded towards 0.
     keep_training_booster : bool, optional (default=False)
         Whether the returned Booster will be used to keep training.
         If False, the returned value will be converted into _InnerPredictor before returning.
@@ -233,13 +203,6 @@ def train(
                     f"Item {i} has type '{type(valid_item).__name__}'."
                 )
 
-    # raise deprecation warnings if necessary
-    # ref: https://github.com/microsoft/LightGBM/issues/6435
-    if categorical_feature != "auto":
-        _emit_dataset_kwarg_warning("train", "categorical_feature")
-    if feature_name != "auto":
-        _emit_dataset_kwarg_warning("train", "feature_name")
-
     # create predictor first
     params = copy.deepcopy(params)
     params = _choose_param_value(
@@ -278,9 +241,7 @@ def train(
     else:
         init_iteration = 0
 
-    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(
-        categorical_feature
-    )
+    train_set._update_params(params)._set_predictor(predictor)
 
     is_valid_contain_train = False
     train_data_name = "training"
@@ -642,8 +603,6 @@ def cv(
     metrics: Optional[Union[str, List[str]]] = None,
     feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
     init_model: Optional[Union[str, Path, Booster]] = None,
-    feature_name: _LGBM_FeatureNameConfiguration = "auto",
-    categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
     fpreproc: Optional[_LGBM_PreprocFunction] = None,
     seed: int = 0,
     callbacks: Optional[List[Callable]] = None,
@@ -699,21 +658,6 @@ def cv(
         set ``metrics`` to the string ``"None"``.
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
-    feature_name : list of str, or 'auto', optional (default="auto")
-        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
-        Feature names.
-        If 'auto' and data is pandas DataFrame, data columns names are used.
-    categorical_feature : list of str or int, or 'auto', optional (default="auto")
-        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
-        Categorical features.
-        If list of int, interpreted as indices.
-        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
-        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
-        All values in categorical features will be cast to int32 and thus should be less than int32 max value (2147483647).
-        Large values could be memory consuming. Consider using consecutive integers starting from zero.
-        All negative values in categorical features will be treated as missing values.
-        The output cannot be monotonically constrained with respect to a categorical feature.
-        Floating point numbers in categorical features will be rounded towards 0.
     fpreproc : callable or None, optional (default=None)
         Preprocessing function that takes (dtrain, dtest, params)
         and returns transformed versions of those.
@@ -767,13 +711,6 @@ def cv(
     if not isinstance(train_set, Dataset):
         raise TypeError(f"cv() only accepts Dataset object, train_set has type '{type(train_set).__name__}'.")
 
-    # raise deprecation warnings if necessary
-    # ref: https://github.com/microsoft/LightGBM/issues/6435
-    if categorical_feature != "auto":
-        _emit_dataset_kwarg_warning("cv", "categorical_feature")
-    if feature_name != "auto":
-        _emit_dataset_kwarg_warning("cv", "feature_name")
-
     params = copy.deepcopy(params)
     params = _choose_param_value(
         main_param_name="objective",
@@ -818,9 +755,7 @@ def cv(
             params.pop(metric_alias, None)
         params["metric"] = metrics
 
-    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(
-        categorical_feature
-    )
+    train_set._update_params(params)._set_predictor(predictor)
 
     results = defaultdict(list)
     cvfolds = _make_n_folds(
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 286f066a3526..9ae471e7f4b9 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1459,7 +1459,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng):
         ]
     )
     y = rng.uniform(size=(100,))
-    ds = lgb.Dataset(X, y)
+    ds = lgb.Dataset(X, y, categorical_feature=[1, 2])
     params = {
         "bagging_fraction": 0.8,
         "bagging_freq": 2,
@@ -1474,7 +1474,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng):
         "verbosity": 0,
     }
     model_file = tmp_path / "model.txt"
-    orig_bst = lgb.train(params, ds, num_boost_round=1, categorical_feature=[1, 2])
+    orig_bst = lgb.train(params, ds, num_boost_round=1)
     orig_bst.save_model(model_file)
     with model_file.open("rt") as f:
         model_contents = f.readlines()
@@ -1746,16 +1746,18 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path):
     gbm0 = lgb.train(params, lgb_train, num_boost_round=10)
     pred0 = gbm0.predict(X_test)
     assert lgb_train.categorical_feature == "auto"
-    lgb_train = lgb.Dataset(X, pd.DataFrame(y))  # also test that label can be one-column pd.DataFrame
-    gbm1 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[0])
+    lgb_train = lgb.Dataset(
+        X, pd.DataFrame(y), categorical_feature=[0]
+    )  # also test that label can be one-column pd.DataFrame
+    gbm1 = lgb.train(params, lgb_train, num_boost_round=10)
     pred1 = gbm1.predict(X_test)
     assert lgb_train.categorical_feature == [0]
-    lgb_train = lgb.Dataset(X, pd.Series(y))  # also test that label can be pd.Series
-    gbm2 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A"])
+    lgb_train = lgb.Dataset(X, pd.Series(y), categorical_feature=["A"])  # also test that label can be pd.Series
+    gbm2 = lgb.train(params, lgb_train, num_boost_round=10)
     pred2 = gbm2.predict(X_test)
     assert lgb_train.categorical_feature == ["A"]
-    lgb_train = lgb.Dataset(X, y)
-    gbm3 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D"])
+    lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D"])
+    gbm3 = lgb.train(params, lgb_train, num_boost_round=10)
     pred3 = gbm3.predict(X_test)
     assert lgb_train.categorical_feature == ["A", "B", "C", "D"]
     categorical_model_path = tmp_path / "categorical.model"
@@ -1767,12 +1769,12 @@ def test_pandas_categorical(rng_fixed_seed, tmp_path):
     pred5 = gbm4.predict(X_test)
     gbm5 = lgb.Booster(model_str=model_str)
     pred6 = gbm5.predict(X_test)
-    lgb_train = lgb.Dataset(X, y)
-    gbm6 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D", "E"])
+    lgb_train = lgb.Dataset(X, y, categorical_feature=["A", "B", "C", "D", "E"])
+    gbm6 = lgb.train(params, lgb_train, num_boost_round=10)
     pred7 = gbm6.predict(X_test)
     assert lgb_train.categorical_feature == ["A", "B", "C", "D", "E"]
-    lgb_train = lgb.Dataset(X, y)
-    gbm7 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[])
+    lgb_train = lgb.Dataset(X, y, categorical_feature=[])
+    gbm7 = lgb.train(params, lgb_train, num_boost_round=10)
     pred8 = gbm7.predict(X_test)
     assert lgb_train.categorical_feature == []
     with pytest.raises(AssertionError):
@@ -3672,12 +3674,11 @@ def test_linear_trees(tmp_path, rng_fixed_seed):
     # test with a categorical feature
     x[:250, 0] = 0
     y[:250] += 10
-    lgb_train = lgb.Dataset(x, label=y)
+    lgb_train = lgb.Dataset(x, label=y, categorical_feature=[0])
     est = lgb.train(
         dict(params, linear_tree=True, subsample=0.8, bagging_freq=1),
         lgb_train,
         num_boost_round=10,
-        categorical_feature=[0],
     )
     # test refit: same results on same data
     est2 = est.refit(x, label=y)
@@ -3700,10 +3701,20 @@ def test_linear_trees(tmp_path, rng_fixed_seed):
     # test when num_leaves - 1 < num_features and when num_leaves - 1 > num_features
     X_train, _, y_train, _ = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
     params = {"linear_tree": True, "verbose": -1, "metric": "mse", "seed": 0}
-    train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=2))
-    est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0])
-    train_data = lgb.Dataset(X_train, label=y_train, params=dict(params, num_leaves=60))
-    est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0])
+    train_data = lgb.Dataset(
+        X_train,
+        label=y_train,
+        params=dict(params, num_leaves=2),
+        categorical_feature=[0],
+    )
+    est = lgb.train(params, train_data, num_boost_round=10)
+    train_data = lgb.Dataset(
+        X_train,
+        label=y_train,
+        params=dict(params, num_leaves=60),
+        categorical_feature=[0],
+    )
+    est = lgb.train(params, train_data, num_boost_round=10)
 
 
 def test_save_and_load_linear(tmp_path):
@@ -3714,8 +3725,8 @@ def test_save_and_load_linear(tmp_path):
     X_train[: X_train.shape[0] // 2, 0] = 0
     y_train[: X_train.shape[0] // 2] = 1
     params = {"linear_tree": True}
-    train_data_1 = lgb.Dataset(X_train, label=y_train, params=params)
-    est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0])
+    train_data_1 = lgb.Dataset(X_train, label=y_train, params=params, categorical_feature=[0])
+    est_1 = lgb.train(params, train_data_1, num_boost_round=10)
     pred_1 = est_1.predict(X_train)
 
     tmp_dataset = str(tmp_path / "temp_dataset.bin")

From 92aa07b4b827d020d9aeeeddec0b3416042c9d2a Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 1 Nov 2024 07:09:16 +0300
Subject: [PATCH 07/27] [ci] check PowerShell scripts with PSScriptAnalyzer
 (part 2) (#6709)

---
 .ci/install-opencl.ps1         |  28 ++--
 .ci/test-r-package-windows.ps1 | 242 ++++++++++++++++-----------------
 .ci/test-windows.ps1           | 156 ++++++++++-----------
 .ci/test.sh                    |   2 +-
 4 files changed, 214 insertions(+), 214 deletions(-)

diff --git a/.ci/install-opencl.ps1 b/.ci/install-opencl.ps1
index 7e335fe13aa4..e48f24e4bf05 100644
--- a/.ci/install-opencl.ps1
+++ b/.ci/install-opencl.ps1
@@ -7,12 +7,12 @@ $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows dow
 Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
 
 if (Test-Path "$installer") {
-  Write-Output "Successfully downloaded OpenCL platform installer"
+    Write-Output "Successfully downloaded OpenCL platform installer"
 } else {
-  Write-Output "Unable to download OpenCL platform installer"
-  Write-Output "Setting EXIT"
-  $host.SetShouldExit(-1)
-  exit 1
+    Write-Output "Unable to download OpenCL platform installer"
+    Write-Output "Setting EXIT"
+    $host.SetShouldExit(-1)
+    exit 1
 }
 
 # Install OpenCL platform from installer executable
@@ -21,14 +21,14 @@ Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/q
 
 $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
 if ($property -eq $null) {
-  Write-Output "Unable to install OpenCL CPU platform"
-  Write-Output "OpenCL installation log:"
-  Get-Content "opencl.log"
-  Write-Output "Setting EXIT"
-  $host.SetShouldExit(-1)
-  exit 1
+    Write-Output "Unable to install OpenCL CPU platform"
+    Write-Output "OpenCL installation log:"
+    Get-Content "opencl.log"
+    Write-Output "Setting EXIT"
+    $host.SetShouldExit(-1)
+    exit 1
 } else {
-  Write-Output "Successfully installed OpenCL CPU platform"
-  Write-Output "Current OpenCL drivers:"
-  Write-Output $property
+    Write-Output "Successfully installed OpenCL CPU platform"
+    Write-Output "Current OpenCL drivers:"
+    Write-Output $property
 }
diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index 57055db1a69f..1dff55c2a9aa 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -75,22 +75,22 @@ Remove-Item C:\rtools43 -Force -Recurse -ErrorAction Ignore
 #    * some paths and file names are different on R4.0
 $env:R_MAJOR_VERSION = $env:R_VERSION.split('.')[0]
 if ($env:R_MAJOR_VERSION -eq "3") {
-  # Rtools 3.x has to be installed at C:\Rtools\
-  #     * https://stackoverflow.com/a/46619260/3986677
-  $RTOOLS_INSTALL_PATH = "C:\Rtools"
-  $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin"
-  $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin"
-  $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe"
-  $env:R_WINDOWS_VERSION = "3.6.3"
+    # Rtools 3.x has to be installed at C:\Rtools\
+    #     * https://stackoverflow.com/a/46619260/3986677
+    $RTOOLS_INSTALL_PATH = "C:\Rtools"
+    $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\bin"
+    $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\mingw_64\bin"
+    $env:RTOOLS_EXE_FILE = "rtools35-x86_64.exe"
+    $env:R_WINDOWS_VERSION = "3.6.3"
 } elseif ($env:R_MAJOR_VERSION -eq "4") {
-  $RTOOLS_INSTALL_PATH = "C:\rtools43"
-  $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin"
-  $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin"
-  $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe"
-  $env:R_WINDOWS_VERSION = "4.3.1"
+    $RTOOLS_INSTALL_PATH = "C:\rtools43"
+    $env:RTOOLS_BIN = "$RTOOLS_INSTALL_PATH\usr\bin"
+    $env:RTOOLS_MINGW_BIN = "$RTOOLS_INSTALL_PATH\x86_64-w64-mingw32.static.posix\bin"
+    $env:RTOOLS_EXE_FILE = "rtools43-5550-5548.exe"
+    $env:R_WINDOWS_VERSION = "4.3.1"
 } else {
-  Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION"
-  Assert-Output $false
+    Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION"
+    Assert-Output $false
 }
 $env:CMAKE_VERSION = "3.30.0"
 
@@ -99,9 +99,9 @@ $env:R_LIBS = "$env:R_LIB_PATH"
 $env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation"
 $env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH
 if ([version]$env:R_VERSION -lt [version]"4.0") {
-  $env:CRAN_MIRROR = "https://cran-archive.r-project.org"
+    $env:CRAN_MIRROR = "https://cran-archive.r-project.org"
 } else {
-  $env:CRAN_MIRROR = "https://cran.rstudio.com"
+    $env:CRAN_MIRROR = "https://cran.rstudio.com"
 }
 $env:MIKTEX_EXCEPTION_PATH = "$env:TEMP\miktex"
 
@@ -112,8 +112,8 @@ if ($env:R_BUILD_TYPE -ne "cran") {
 }
 
 if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) {
-  $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe"
-  $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe"
+    $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe"
+    $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe"
 }
 
 cd $env:BUILD_SOURCESDIRECTORY
@@ -152,158 +152,158 @@ Write-Output "Building R-package"
 # R CMD check is not used for MSVC builds
 if ($env:COMPILER -ne "MSVC") {
 
-  $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz"
-  $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log"
+    $PKG_FILE_NAME = "lightgbm_$env:LGB_VER.tar.gz"
+    $LOG_FILE_NAME = "lightgbm.Rcheck/00check.log"
 
-  if ($env:R_BUILD_TYPE -eq "cmake") {
-    if ($env:TOOLCHAIN -eq "MINGW") {
-      Write-Output "Telling R to use MinGW"
-      $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')"
-    } elseif ($env:TOOLCHAIN -eq "MSYS") {
-      Write-Output "Telling R to use MSYS"
-      $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')"
-    } elseif ($env:TOOLCHAIN -eq "MSVC") {
-      $env:BUILD_R_FLAGS = "'--skip-install'"
-    } else {
-      Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN"
-      Assert-Output $false
-    }
-    Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $?
-  } elseif ($env:R_BUILD_TYPE -eq "cran") {
-    # NOTE: gzip and tar are needed to create a CRAN package on Windows, but
-    # some flavors of tar.exe can fail in some settings on Windows.
-    # Putting the msys64 utilities at the beginning of PATH temporarily to be
-    # sure they're used for that purpose.
-    if ($env:R_MAJOR_VERSION -eq "3") {
-      $env:PATH = "C:\msys64\usr\bin;" + $env:PATH
+    if ($env:R_BUILD_TYPE -eq "cmake") {
+        if ($env:TOOLCHAIN -eq "MINGW") {
+            Write-Output "Telling R to use MinGW"
+            $env:BUILD_R_FLAGS = "c('--skip-install', '--use-mingw', '-j4')"
+        } elseif ($env:TOOLCHAIN -eq "MSYS") {
+            Write-Output "Telling R to use MSYS"
+            $env:BUILD_R_FLAGS = "c('--skip-install', '--use-msys2', '-j4')"
+        } elseif ($env:TOOLCHAIN -eq "MSVC") {
+            $env:BUILD_R_FLAGS = "'--skip-install'"
+        } else {
+            Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN"
+            Assert-Output $false
+        }
+        Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $?
+    } elseif ($env:R_BUILD_TYPE -eq "cran") {
+        # NOTE: gzip and tar are needed to create a CRAN package on Windows, but
+        # some flavors of tar.exe can fail in some settings on Windows.
+        # Putting the msys64 utilities at the beginning of PATH temporarily to be
+        # sure they're used for that purpose.
+        if ($env:R_MAJOR_VERSION -eq "3") {
+            $env:PATH = "C:\msys64\usr\bin;" + $env:PATH
+        }
+        Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $?
+        Remove-From-Path ".*msys64.*"
+        # Test CRAN source .tar.gz in a directory that is not this repo or below it.
+        # When people install.packages('lightgbm'), they won't have the LightGBM
+        # git repo around. This is to protect against the use of relative paths
+        # like ../../CMakeLists.txt that would only work if you are in the repoo
+        $R_CMD_CHECK_DIR = "tmp-r-cmd-check"
+        New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null
+        Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null
+        cd "C:\$R_CMD_CHECK_DIR\"
     }
-    Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $?
-    Remove-From-Path ".*msys64.*"
-    # Test CRAN source .tar.gz in a directory that is not this repo or below it.
-    # When people install.packages('lightgbm'), they won't have the LightGBM
-    # git repo around. This is to protect against the use of relative paths
-    # like ../../CMakeLists.txt that would only work if you are in the repoo
-    $R_CMD_CHECK_DIR = "tmp-r-cmd-check"
-    New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null
-    Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null
-    cd "C:\$R_CMD_CHECK_DIR\"
-  }
 
-  Write-Output "Running R CMD check"
-  if ($env:R_BUILD_TYPE -eq "cran") {
-    # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit)
-    $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
-  } else {
-    $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
-  }
-  Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
+    Write-Output "Running R CMD check"
+    if ($env:R_BUILD_TYPE -eq "cran") {
+        # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit)
+        $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
+    } else {
+        $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
+    }
+    Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
 
-  Write-Output "R CMD check build logs:"
-  $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out"
-  Get-Content -Path "$INSTALL_LOG_FILE_NAME"
+    Write-Output "R CMD check build logs:"
+    $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out"
+    Get-Content -Path "$INSTALL_LOG_FILE_NAME"
 
-  Assert-Output $check_succeeded
+    Assert-Output $check_succeeded
 
-  Write-Output "Looking for issues with R CMD check results"
-  if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) {
-      echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
-      Assert-Output $False
-  }
+    Write-Output "Looking for issues with R CMD check results"
+    if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) {
+        echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
+        Assert-Output $False
+    }
 
 } else {
-  $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt"
-  Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
-  Write-Output "----- build and install logs -----"
-  Get-Content -Path "$INSTALL_LOG_FILE_NAME"
-  Write-Output "----- end of build and install logs -----"
-  Assert-Output $install_succeeded
-  # some errors are not raised above, but can be found in the logs
-  if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) {
-      echo "ERRORs have been found installing lightgbm"
-      Assert-Output $False
-  }
+    $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt"
+    Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
+    Write-Output "----- build and install logs -----"
+    Get-Content -Path "$INSTALL_LOG_FILE_NAME"
+    Write-Output "----- end of build and install logs -----"
+    Assert-Output $install_succeeded
+    # some errors are not raised above, but can be found in the logs
+    if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) {
+        echo "ERRORs have been found installing lightgbm"
+        Assert-Output $False
+    }
 }
 
 # Checking that the correct R version was used
 if ($env:TOOLCHAIN -ne "MSVC") {
-  $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION"
+    $checks_cnt = $checks.Matches.length
 } else {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION"
+    $checks_cnt = $checks.Matches.length
 }
 if ($checks_cnt -eq 0) {
-  Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs."
-  Assert-Output $False
+    Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs."
+    Assert-Output $False
 }
 
 # Checking that we actually got the expected compiler. The R-package has some logic
 # to fail back to MinGW if MSVC fails, but for CI builds we need to check that the correct
 # compiler was used.
 if ($env:R_BUILD_TYPE -eq "cmake") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER"
-  if ($checks.Matches.length -eq 0) {
-    Write-Output "The wrong compiler was used. Check the build logs."
-    Assert-Output $False
-  }
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Check for working CXX compiler.*$env:COMPILER"
+    if ($checks.Matches.length -eq 0) {
+        Write-Output "The wrong compiler was used. Check the build logs."
+        Assert-Output $False
+    }
 }
 
 # Checking that we got the right toolchain for MinGW. If using MinGW, both
 # MinGW and MSYS toolchains are supported
 if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN"
-  if ($checks.Matches.length -eq 0) {
-    Write-Output "The wrong toolchain was used. Check the build logs."
-    Assert-Output $False
-  }
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN"
+    if ($checks.Matches.length -eq 0) {
+        Write-Output "The wrong toolchain was used. Check the build logs."
+        Assert-Output $False
+    }
 }
 
 # Checking that MM_PREFETCH preprocessor definition is actually used in CI builds.
 if ($env:R_BUILD_TYPE -eq "cran") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_PREFETCH work.*yes"
+    $checks_cnt = $checks.Matches.length
 } elseif ($env:TOOLCHAIN -ne "MSVC") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_PREFETCH - Success"
+    $checks_cnt = $checks.Matches.length
 } else {
-  $checks_cnt = 1
+    $checks_cnt = 1
 }
 if ($checks_cnt -eq 0) {
-  Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs."
-  Assert-Output $False
+    Write-Output "MM_PREFETCH preprocessor definition wasn't used. Check the build logs."
+    Assert-Output $False
 }
 
 # Checking that MM_MALLOC preprocessor definition is actually used in CI builds.
 if ($env:R_BUILD_TYPE -eq "cran") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "checking whether MM_MALLOC work.*yes"
+    $checks_cnt = $checks.Matches.length
 } elseif ($env:TOOLCHAIN -ne "MSVC") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success"
-  $checks_cnt = $checks.Matches.length
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Performing Test MM_MALLOC - Success"
+    $checks_cnt = $checks.Matches.length
 } else {
-  $checks_cnt = 1
+    $checks_cnt = 1
 }
 if ($checks_cnt -eq 0) {
-  Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs."
-  Assert-Output $False
+    Write-Output "MM_MALLOC preprocessor definition wasn't used. Check the build logs."
+    Assert-Output $False
 }
 
 # Checking that OpenMP is actually used in CMake builds.
 if ($env:R_BUILD_TYPE -eq "cmake") {
-  $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*"
-  if ($checks.Matches.length -eq 0) {
-    Write-Output "OpenMP wasn't found. Check the build logs."
-    Assert-Output $False
-  }
+    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern ".*Found OpenMP: TRUE.*"
+    if ($checks.Matches.length -eq 0) {
+        Write-Output "OpenMP wasn't found. Check the build logs."
+        Assert-Output $False
+    }
 }
 
 if ($env:COMPILER -eq "MSVC") {
-  Write-Output "Running tests with testthat.R"
-  cd R-package/tests
-  # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr,
-  #       because something about the interaction between Invoke-R-Code-Redirect-Stderr
-  #       and testthat results in failing tests not exiting with a non-0 exit code.
-  Rscript.exe --vanilla "testthat.R" ; Assert-Output $?
+    Write-Output "Running tests with testthat.R"
+    cd R-package/tests
+    # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr,
+    #       because something about the interaction between Invoke-R-Code-Redirect-Stderr
+    #       and testthat results in failing tests not exiting with a non-0 exit code.
+    Rscript.exe --vanilla "testthat.R" ; Assert-Output $?
 }
 
 Write-Output "No issues were found checking the R-package"
diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1
index 87c214856212..f3015ae7d180 100644
--- a/.ci/test-windows.ps1
+++ b/.ci/test-windows.ps1
@@ -17,41 +17,41 @@ Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore
 [Void][System.IO.Directory]::CreateDirectory($env:TMPDIR)
 
 if ($env:TASK -eq "r-package") {
-  & .\.ci\test-r-package-windows.ps1 ; Assert-Output $?
-  Exit 0
+    & .\.ci\test-r-package-windows.ps1 ; Assert-Output $?
+    Exit 0
 }
 
 if ($env:TASK -eq "cpp-tests") {
-  cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64
-  cmake --build build --target testlightgbm --config Debug ; Assert-Output $?
-  .\Debug\testlightgbm.exe ; Assert-Output $?
-  Exit 0
+    cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_DEBUG=ON -A x64
+    cmake --build build --target testlightgbm --config Debug ; Assert-Output $?
+    .\Debug\testlightgbm.exe ; Assert-Output $?
+    Exit 0
 }
 
 if ($env:TASK -eq "swig") {
-  $env:JAVA_HOME = $env:JAVA_HOME_8_X64  # there is pre-installed Eclipse Temurin 8 somewhere
-  $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
-  Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl"
-  Add-Type -AssemblyName System.IO.Compression.FileSystem
-  [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $?
-  $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig"
-  $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH
-  $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log"
-  cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $?
-  Write-Output "CMake build logs:"
-  Get-Content -Path "$BuildLogFileName"
-  Assert-Output $build_succeeded
-  $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe"
-  $checks_cnt = $checks.Matches.length
-  if ($checks_cnt -eq 0) {
-    Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs."
-    Assert-Output $False
-  }
-  cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
-  if ($env:AZURE -eq "true") {
-    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $?
-  }
-  Exit 0
+    $env:JAVA_HOME = $env:JAVA_HOME_8_X64  # there is pre-installed Eclipse Temurin 8 somewhere
+    $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
+    Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl"
+    Add-Type -AssemblyName System.IO.Compression.FileSystem
+    [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $?
+    $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig"
+    $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH
+    $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log"
+    cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $?
+    Write-Output "CMake build logs:"
+    Get-Content -Path "$BuildLogFileName"
+    Assert-Output $build_succeeded
+    $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe"
+    $checks_cnt = $checks.Matches.length
+    if ($checks_cnt -eq 0) {
+        Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs."
+        Assert-Output $False
+    }
+    cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
+    if ($env:AZURE -eq "true") {
+        cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Assert-Output $?
+    }
+    Exit 0
 }
 
 # setup for Python
@@ -61,82 +61,82 @@ conda config --set always_yes yes --set changeps1 no
 conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]"
 
 if ($env:PYTHON_VERSION -eq "3.7") {
-  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
+    $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
 } elseif ($env:PYTHON_VERSION -eq "3.8") {
-  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
+    $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
 } else {
-  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
+    $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
 }
 
 conda create `
-  -y `
-  -n $env:CONDA_ENV `
-  --file $env:CONDA_REQUIREMENT_FILE `
-  "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $?
+    -y `
+    -n $env:CONDA_ENV `
+    --file $env:CONDA_REQUIREMENT_FILE `
+    "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $?
 
 if ($env:TASK -ne "bdist") {
-  conda activate $env:CONDA_ENV
+    conda activate $env:CONDA_ENV
 }
 
 cd $env:BUILD_SOURCESDIRECTORY
 if ($env:TASK -eq "regular") {
-  cmake -B build -S . -A x64 ; Assert-Output $?
-  cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
-  sh ./build-python.sh install --precompile ; Assert-Output $?
-  cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-  cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+    cmake -B build -S . -A x64 ; Assert-Output $?
+    cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
+    sh ./build-python.sh install --precompile ; Assert-Output $?
+    cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+    cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 }
 elseif ($env:TASK -eq "sdist") {
-  sh ./build-python.sh sdist ; Assert-Output $?
-  sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
-  cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $?
+    sh ./build-python.sh sdist ; Assert-Output $?
+    sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
+    cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $?
 }
 elseif ($env:TASK -eq "bdist") {
-  # Import the Chocolatey profile module so that the RefreshEnv command
-  # invoked below properly updates the current PowerShell session environment.
-  $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
-  Import-Module "$module" ; Assert-Output $?
-  RefreshEnv
+    # Import the Chocolatey profile module so that the RefreshEnv command
+    # invoked below properly updates the current PowerShell session environment.
+    $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+    Import-Module "$module" ; Assert-Output $?
+    RefreshEnv
 
-  Write-Output "Current OpenCL drivers:"
-  Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
+    Write-Output "Current OpenCL drivers:"
+    Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
 
-  conda activate $env:CONDA_ENV
-  sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $?
-  sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
-  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $?
-  cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+    conda activate $env:CONDA_ENV
+    sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $?
+    sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
+    cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $?
+    cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
-  if ($env:COMPILER -eq "MINGW") {
-    sh ./build-python.sh install --mingw ; Assert-Output $?
-  } else {
-    sh ./build-python.sh install; Assert-Output $?
-  }
+    if ($env:COMPILER -eq "MINGW") {
+        sh ./build-python.sh install --mingw ; Assert-Output $?
+    } else {
+        sh ./build-python.sh install; Assert-Output $?
+    }
 }
 
 if (($env:TASK -eq "sdist") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) {
-  # cannot test C API with "sdist" task
-  $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test"
+    # cannot test C API with "sdist" task
+    $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test"
 } else {
-  $tests = $env:BUILD_SOURCESDIRECTORY + "/tests"
+    $tests = $env:BUILD_SOURCESDIRECTORY + "/tests"
 }
 if ($env:TASK -eq "bdist") {
-  # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
-  $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1"
+    # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
+    $env:LIGHTGBM_TEST_DUAL_CPU_GPU = "1"
 }
 
 pytest $tests ; Assert-Output $?
 
 if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) {
-  cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
-  @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py"
-  (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py"  # prevent interactive window mode
-  conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2"
-  foreach ($file in @(Get-ChildItem *.py)) {
-    @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
-    python $file ; Assert-Output $?
-  }  # run all examples
-  cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks
-  (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb"
-  jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $?  # run all notebooks
+    cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
+    @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py"
+    (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py"  # prevent interactive window mode
+    conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2"
+    foreach ($file in @(Get-ChildItem *.py)) {
+        @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
+        python $file ; Assert-Output $?
+    }  # run all examples
+    cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks
+    (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb"
+    jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $?  # run all notebooks
 }
diff --git a/.ci/test.sh b/.ci/test.sh
index 9b3e1ee3938d..f959af16ccf3 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -100,7 +100,7 @@ fi
 if [[ $TASK == "lint" ]]; then
     pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck"
     echo "Linting PowerShell code"
-    pwsh -file "./.ci/lint-powershell.ps1" || exit 0
+    pwsh -file "./.ci/lint-powershell.ps1" || :
     conda create -q -y -n "${CONDA_ENV}" \
         "${CONDA_PYTHON_REQUIREMENT}" \
         'cmakelint>=1.4.3' \

From 13f2e92bb0ac64f94d9b5016a33b5c34d2134204 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sun, 3 Nov 2024 19:32:25 +0300
Subject: [PATCH 08/27] [ci] check JavaScript code with `biome` tool (#6711)

* lint js code

* hotfix

* Update .editorconfig

Co-authored-by: James Lamb <jaylamb20@gmail.com>

---------

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 .ci/lint-js.sh                                |  5 ++
 .ci/{lint-python.sh => lint-python-bash.sh}   |  0
 .ci/test.sh                                   |  7 +-
 .editorconfig                                 | 11 ++-
 biome.json                                    | 21 ++++++
 docs/_static/js/script.js                     | 69 +++++++++++--------
 .../binary_classification/forced_splits.json  |  2 +-
 examples/regression/forced_bins.json          |  4 +-
 examples/regression/forced_bins2.json         |  2 +-
 python-package/README.rst                     |  2 +-
 10 files changed, 81 insertions(+), 42 deletions(-)
 create mode 100644 .ci/lint-js.sh
 rename .ci/{lint-python.sh => lint-python-bash.sh} (100%)
 create mode 100644 biome.json

diff --git a/.ci/lint-js.sh b/.ci/lint-js.sh
new file mode 100644
index 000000000000..534f251620e2
--- /dev/null
+++ b/.ci/lint-js.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -e -E -u -o pipefail
+
+biome ci --config-path=./biome.json --diagnostic-level=info --error-on-warnings ./
diff --git a/.ci/lint-python.sh b/.ci/lint-python-bash.sh
similarity index 100%
rename from .ci/lint-python.sh
rename to .ci/lint-python-bash.sh
diff --git a/.ci/test.sh b/.ci/test.sh
index f959af16ccf3..45ee65629744 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -103,6 +103,7 @@ if [[ $TASK == "lint" ]]; then
     pwsh -file "./.ci/lint-powershell.ps1" || :
     conda create -q -y -n "${CONDA_ENV}" \
         "${CONDA_PYTHON_REQUIREMENT}" \
+        'biome>=1.9.3' \
         'cmakelint>=1.4.3' \
         'cpplint>=1.6.0' \
         'matplotlib-base>=3.9.1' \
@@ -113,12 +114,14 @@ if [[ $TASK == "lint" ]]; then
         'r-lintr>=3.1.2'
     # shellcheck disable=SC1091
     source activate "${CONDA_ENV}"
-    echo "Linting Python code"
-    bash ./.ci/lint-python.sh || exit 1
+    echo "Linting Python and bash code"
+    bash ./.ci/lint-python-bash.sh || exit 1
     echo "Linting R code"
     Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1
     echo "Linting C++ code"
     bash ./.ci/lint-cpp.sh || exit 1
+    echo "Linting JavaScript code"
+    bash ./.ci/lint-js.sh || exit 1
     exit 0
 fi
 
diff --git a/.editorconfig b/.editorconfig
index f4ae446b64bb..e7191b63c1d3 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,22 +1,19 @@
 root = true
 
 [*]
-charset=utf-8
+charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
+end_of_line = lf
 indent_style = space
 indent_size = 2
 
-[*.{py,sh,js,ps1}]
+[*.{py,sh,ps1,js,json}]
 indent_size = 4
-line_length = 120
+max_line_length = 120
 skip = external_libs
 known_first_party = lightgbm
 
-# Placeholder files
-[{*.gitkeep,__init__.py}]
-insert_final_newline = none
-
 # Tabs matter for Makefile and .gitmodules
 [{makefile*,Makefile*,*.mk,*.mak,*.makefile,*.Makefile,GNUmakefile,BSDmakefile,make.bat,Makevars*,*.gitmodules}]
 indent_style = tab
diff --git a/biome.json b/biome.json
new file mode 100644
index 000000000000..5029d037189e
--- /dev/null
+++ b/biome.json
@@ -0,0 +1,21 @@
+{
+    "files": {
+        "ignore": [".mypy_cache/"]
+    },
+    "formatter": {
+        "enabled": true,
+        "useEditorconfig": true
+    },
+    "organizeImports": {
+        "enabled": true
+    },
+    "linter": {
+        "enabled": true,
+        "rules": {
+            "all": true
+        }
+    },
+    "javascript": {
+        "globals": ["$"]
+    }
+}
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index 3cfc90de887d..3f129501e06f 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -1,56 +1,69 @@
-$(function() {
+$(() => {
     /* Use wider container for the page content */
-    $('.wy-nav-content').each(function() { this.style.setProperty('max-width', 'none', 'important'); });
+    $(".wy-nav-content").each(function () {
+        this.style.setProperty("max-width", "none", "important");
+    });
 
     /* List each class property item on a new line
        https://github.com/microsoft/LightGBM/issues/5073 */
-    if(window.location.pathname.toLocaleLowerCase().indexOf('pythonapi') !== -1) {
-        $('.py.property').each(function() { this.style.setProperty('display', 'inline', 'important'); });
+    if (window.location.pathname.toLocaleLowerCase().indexOf("pythonapi") !== -1) {
+        $(".py.property").each(function () {
+            this.style.setProperty("display", "inline", "important");
+        });
     }
 
     /* Collapse specified sections in the installation guide */
-    if(window.location.pathname.toLocaleLowerCase().indexOf('installation-guide') !== -1) {
-        $('<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>').appendTo('body');
-        var collapsable = [
-            '#build-threadless-version-not-recommended',
-            '#build-mpi-version',
-            '#build-gpu-version',
-            '#build-cuda-version',
-            '#build-java-wrapper',
-            '#build-c-unit-tests'
+    if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) {
+        $(
+            '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
+        ).appendTo("body");
+        const collapsable = [
+            "#build-threadless-version-not-recommended",
+            "#build-mpi-version",
+            "#build-gpu-version",
+            "#build-cuda-version",
+            "#build-java-wrapper",
+            "#build-c-unit-tests",
         ];
-        $.each(collapsable, function(_, val) {
-            var header = val + ' > :header:first';
-            var content = val + ' :not(:header:first)';
-            $(header).addClass('closed');
+        $.each(collapsable, (_, val) => {
+            const header = `${val} > :header:first`;
+            const content = `${val} :not(:header:first)`;
+            $(header).addClass("closed");
             $(content).hide();
-            $(header).click(function() {
-                $(header).toggleClass('closed opened');
+            $(header).click(() => {
+                $(header).toggleClass("closed opened");
                 $(content).slideToggle(0);
             });
         });
         /* Uncollapse parent sections when nested section is specified in the URL or before navigate to it from navbar */
         function uncollapse(section) {
-            section.parents().each((_, val) => { $(val).children('.closed').click(); });
+            section.parents().each((_, val) => {
+                $(val).children(".closed").click();
+            });
         }
         uncollapse($(window.location.hash));
-        $('.wy-menu.wy-menu-vertical li a.reference.internal').click(function() {
-            uncollapse($($(this).attr('href')));
+        $(".wy-menu.wy-menu-vertical li a.reference.internal").click(function () {
+            uncollapse($($(this).attr("href")));
         });
 
         /* Modify src and href attrs of artifacts badge */
         function modifyBadge(src, href) {
-            $('img[alt="download artifacts"]').each(function() {
+            $('img[alt="download artifacts"]').each(function () {
                 this.src = src;
                 this.parentNode.href = href;
             });
         }
         /* Initialize artifacts badge */
-        modifyBadge('./_static/images/artifacts-fetching.svg', '#');
+        modifyBadge("./_static/images/artifacts-fetching.svg", "#");
         /* Fetch latest buildId and construct artifacts badge */
-        $.getJSON('https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7', function(data) {
-            modifyBadge('./_static/images/artifacts-download.svg',
-                        'https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/' + data['value'][0]['id'] + '/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip');
-            });
+        $.getJSON(
+            "https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds?branchName=refs/heads/master&resultFilter=succeeded&queryOrder=finishTimeDescending&%24top=1&api-version=7.1-preview.7",
+            (data) => {
+                modifyBadge(
+                    "./_static/images/artifacts-download.svg",
+                    `https://dev.azure.com/lightgbm-ci/lightgbm-ci/_apis/build/builds/${data.value[0].id}/artifacts?artifactName=PackageAssets&api-version=7.1-preview.5&%24format=zip`,
+                );
+            },
+        );
     }
 });
diff --git a/examples/binary_classification/forced_splits.json b/examples/binary_classification/forced_splits.json
index 1ee410c9789e..b09391a87f49 100644
--- a/examples/binary_classification/forced_splits.json
+++ b/examples/binary_classification/forced_splits.json
@@ -1,6 +1,6 @@
 {
     "feature": 25,
-    "threshold": 1.30,
+    "threshold": 1.3,
     "left": {
         "feature": 26,
         "threshold": 0.85
diff --git a/examples/regression/forced_bins.json b/examples/regression/forced_bins.json
index 1ee0a49d727c..19722afbbb4b 100644
--- a/examples/regression/forced_bins.json
+++ b/examples/regression/forced_bins.json
@@ -1,10 +1,10 @@
 [
     {
         "feature": 0,
-        "bin_upper_bound": [ 0.3, 0.35, 0.4 ]
+        "bin_upper_bound": [0.3, 0.35, 0.4]
     },
     {
         "feature": 1,
-        "bin_upper_bound": [ -0.1, -0.15, -0.2 ]
+        "bin_upper_bound": [-0.1, -0.15, -0.2]
     }
 ]
diff --git a/examples/regression/forced_bins2.json b/examples/regression/forced_bins2.json
index f4dca0ccaf34..d6454f8a4ae9 100644
--- a/examples/regression/forced_bins2.json
+++ b/examples/regression/forced_bins2.json
@@ -1,6 +1,6 @@
 [
     {
         "feature": 0,
-        "bin_upper_bound": [ 0.19, 0.39, 0.59, 0.79 ]
+        "bin_upper_bound": [0.19, 0.39, 0.59, 0.79]
     }
 ]
diff --git a/python-package/README.rst b/python-package/README.rst
index 0e007e5ee7ec..face6bba6b74 100644
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -286,7 +286,7 @@ To check that a contribution to the package matches its style expectations, run
 
 .. code:: sh
 
-    bash .ci/lint-python.sh
+    bash .ci/lint-python-bash.sh
 
 .. |License| image:: https://img.shields.io/github/license/microsoft/lightgbm.svg
    :target: https://github.com/microsoft/LightGBM/blob/master/LICENSE

From e0071911c8327df9a031ba7e61e9a2c6cff43d76 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sun, 3 Nov 2024 22:12:20 +0300
Subject: [PATCH 09/27] [ci] check PowerShell scripts with PSScriptAnalyzer
 (part 3) (#6710)

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 .ci/install-opencl.ps1         | 12 +++--
 .ci/lint-powershell.ps1        |  2 +-
 .ci/test-r-package-windows.ps1 | 94 ++++++++++++++++++++++++++--------
 .ci/test-windows.ps1           | 77 ++++++++++++++++++----------
 .ci/test.sh                    |  2 +-
 5 files changed, 134 insertions(+), 53 deletions(-)

diff --git a/.ci/install-opencl.ps1 b/.ci/install-opencl.ps1
index e48f24e4bf05..b69ed575f0fb 100644
--- a/.ci/install-opencl.ps1
+++ b/.ci/install-opencl.ps1
@@ -4,7 +4,11 @@ $installer = "AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe"
 
 Write-Output "Downloading OpenCL platform installer"
 $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
-Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
+$params = @{
+    OutFile = "$installer"
+    Uri = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
+}
+Invoke-WebRequest @params
 
 if (Test-Path "$installer") {
     Write-Output "Successfully downloaded OpenCL platform installer"
@@ -17,10 +21,12 @@ if (Test-Path "$installer") {
 
 # Install OpenCL platform from installer executable
 Write-Output "Running OpenCL installer"
-Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait }
+Invoke-Command -ScriptBlock {
+    Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait
+}
 
 $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
-if ($property -eq $null) {
+if ($null -eq $property) {
     Write-Output "Unable to install OpenCL CPU platform"
     Write-Output "OpenCL installation log:"
     Get-Content "opencl.log"
diff --git a/.ci/lint-powershell.ps1 b/.ci/lint-powershell.ps1
index b2e045917ab6..332a6e040319 100644
--- a/.ci/lint-powershell.ps1
+++ b/.ci/lint-powershell.ps1
@@ -53,4 +53,4 @@ $settings = @{
     }
 }
 
-Invoke-ScriptAnalyzer -Path "$env:BUILD_DIRECTORY/.ci" -Recurse -EnableExit -Settings $settings
+Invoke-ScriptAnalyzer -Path ./ -Recurse -EnableExit -Settings $settings
diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index 1dff55c2a9aa..1ce698a49c72 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -97,7 +97,13 @@ $env:CMAKE_VERSION = "3.30.0"
 $env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/'
 $env:R_LIBS = "$env:R_LIB_PATH"
 $env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation"
-$env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH
+$env:PATH = @(
+    "$env:RTOOLS_BIN",
+    "$env:RTOOLS_MINGW_BIN",
+    "$env:R_LIB_PATH/R/bin/x64",
+    "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin",
+    "$env:PATH"
+) -join ";"
 if ([version]$env:R_VERSION -lt [version]"4.0") {
     $env:CRAN_MIRROR = "https://cran-archive.r-project.org"
 } else {
@@ -116,24 +122,50 @@ if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) {
     $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe"
 }
 
-cd $env:BUILD_SOURCESDIRECTORY
+Set-Location "$env:BUILD_SOURCESDIRECTORY"
 tzutil /s "GMT Standard Time"
-[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH)
-[Void][System.IO.Directory]::CreateDirectory($env:CMAKE_PATH)
+[Void][System.IO.Directory]::CreateDirectory("$env:R_LIB_PATH")
+[Void][System.IO.Directory]::CreateDirectory("$env:CMAKE_PATH")
 
 # download R, RTools and CMake
 Write-Output "Downloading R, Rtools and CMake"
-Get-File-With-Tenacity -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
-Get-File-With-Tenacity -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe"
-Get-File-With-Tenacity -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip"
+$params = @{
+    url = "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe"
+    destfile = "R-win.exe"
+}
+Get-File-With-Tenacity @params
+
+$params = @{
+    url = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE"
+    destfile = "Rtools.exe"
+}
+Get-File-With-Tenacity @params
+
+$params = @{
+    url = "https://github.com/Kitware/CMake/releases/download/v{0}/cmake-{0}-windows-x86_64.zip" -f $env:CMAKE_VERSION
+    destfile = "$env:CMAKE_PATH/cmake.zip"
+}
+Get-File-With-Tenacity @params
 
 # Install R
 Write-Output "Installing R"
-Start-Process -FilePath R-win.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386" ; Assert-Output $?
+$params = @{
+    FilePath = "R-win.exe"
+    NoNewWindow = $true
+    Wait = $true
+    ArgumentList = "/VERYSILENT /DIR=$env:R_LIB_PATH/R /COMPONENTS=main,x64,i386"
+}
+Start-Process @params ; Assert-Output $?
 Write-Output "Done installing R"
 
 Write-Output "Installing Rtools"
-Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Assert-Output $?
+$params = @{
+    FilePath = "Rtools.exe"
+    NoNewWindow = $true
+    Wait = $true
+    ArgumentList = "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH"
+}
+Start-Process @params; Assert-Output $?
 Write-Output "Done installing Rtools"
 
 Write-Output "Installing CMake"
@@ -144,8 +176,16 @@ Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore
 Write-Output "Done installing CMake"
 
 Write-Output "Installing dependencies"
-$packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
-Invoke-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Assert-Output $?
+$packages = -join @(
+    "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ",
+    "dependencies = c('Imports', 'Depends', 'LinkingTo')"
+)
+$params = -join @(
+    "options(install.packages.check.source = 'no'); ",
+    "install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', ",
+    "lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())"
+)
+Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $?
 
 Write-Output "Building R-package"
 
@@ -168,16 +208,21 @@ if ($env:COMPILER -ne "MSVC") {
             Write-Output "[ERROR] Unrecognized toolchain: $env:TOOLCHAIN"
             Assert-Output $false
         }
-        Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"; Assert-Output $?
+        Invoke-R-Code-Redirect-Stderr "commandArgs <- function(...){$env:BUILD_R_FLAGS}; source('build_r.R')"
+        Assert-Output $?
     } elseif ($env:R_BUILD_TYPE -eq "cran") {
         # NOTE: gzip and tar are needed to create a CRAN package on Windows, but
         # some flavors of tar.exe can fail in some settings on Windows.
         # Putting the msys64 utilities at the beginning of PATH temporarily to be
         # sure they're used for that purpose.
         if ($env:R_MAJOR_VERSION -eq "3") {
-            $env:PATH = "C:\msys64\usr\bin;" + $env:PATH
+            $env:PATH = @("C:\msys64\usr\bin", "$env:PATH") -join ";"
         }
-        Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; Assert-Output $?
+        $params = -join @(
+            "result <- processx::run(command = 'sh', args = 'build-cran-package.sh', ",
+            "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)"
+        )
+        Invoke-R-Code-Redirect-Stderr $params ; Assert-Output $?
         Remove-From-Path ".*msys64.*"
         # Test CRAN source .tar.gz in a directory that is not this repo or below it.
         # When people install.packages('lightgbm'), they won't have the LightGBM
@@ -186,7 +231,7 @@ if ($env:COMPILER -ne "MSVC") {
         $R_CMD_CHECK_DIR = "tmp-r-cmd-check"
         New-Item -Path "C:\" -Name $R_CMD_CHECK_DIR -ItemType "directory" > $null
         Move-Item -Path "$PKG_FILE_NAME" -Destination "C:\$R_CMD_CHECK_DIR\" > $null
-        cd "C:\$R_CMD_CHECK_DIR\"
+        Set-Location "C:\$R_CMD_CHECK_DIR\"
     }
 
     Write-Output "Running R CMD check"
@@ -196,7 +241,11 @@ if ($env:COMPILER -ne "MSVC") {
     } else {
         $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
     }
-    Invoke-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
+    $params = -join (
+        "result <- processx::run(command = 'R.exe', args = $check_args, ",
+        "echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)"
+    )
+    Invoke-R-Code-Redirect-Stderr $params ; $check_succeeded = $?
 
     Write-Output "R CMD check build logs:"
     $INSTALL_LOG_FILE_NAME = "lightgbm.Rcheck\00install.out"
@@ -206,10 +255,9 @@ if ($env:COMPILER -ne "MSVC") {
 
     Write-Output "Looking for issues with R CMD check results"
     if (Get-Content "$LOG_FILE_NAME" | Select-String -Pattern "NOTE|WARNING|ERROR" -CaseSensitive -Quiet) {
-        echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
+        Write-Output "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
         Assert-Output $False
     }
-
 } else {
     $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt"
     Invoke-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
@@ -219,7 +267,7 @@ if ($env:COMPILER -ne "MSVC") {
     Assert-Output $install_succeeded
     # some errors are not raised above, but can be found in the logs
     if (Get-Content "$INSTALL_LOG_FILE_NAME" | Select-String -Pattern "ERROR" -CaseSensitive -Quiet) {
-        echo "ERRORs have been found installing lightgbm"
+        Write-Output "ERRORs have been found installing lightgbm"
         Assert-Output $False
     }
 }
@@ -229,7 +277,11 @@ if ($env:TOOLCHAIN -ne "MSVC") {
     $checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION"
     $checks_cnt = $checks.Matches.length
 } else {
-    $checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION"
+    $checksParams = @{
+        Path = "${INSTALL_LOG_FILE_NAME}"
+        Pattern = "R version passed into FindLibR.* $env:R_WINDOWS_VERSION"
+    }
+    $checks = Select-String @checksParams
     $checks_cnt = $checks.Matches.length
 }
 if ($checks_cnt -eq 0) {
@@ -299,7 +351,7 @@ if ($env:R_BUILD_TYPE -eq "cmake") {
 
 if ($env:COMPILER -eq "MSVC") {
     Write-Output "Running tests with testthat.R"
-    cd R-package/tests
+    Set-Location R-package/tests
     # NOTE: using Rscript.exe intentionally here, instead of Invoke-R-Code-Redirect-Stderr,
     #       because something about the interaction between Invoke-R-Code-Redirect-Stderr
     #       and testthat results in failing tests not exiting with a non-0 exit code.
diff --git a/.ci/test-windows.ps1 b/.ci/test-windows.ps1
index f3015ae7d180..264c13961aff 100644
--- a/.ci/test-windows.ps1
+++ b/.ci/test-windows.ps1
@@ -31,11 +31,19 @@ if ($env:TASK -eq "cpp-tests") {
 if ($env:TASK -eq "swig") {
     $env:JAVA_HOME = $env:JAVA_HOME_8_X64  # there is pre-installed Eclipse Temurin 8 somewhere
     $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
-    Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl"
+    $params = @{
+        Uri = "https://sourceforge.net/projects/swig/files/latest/download"
+        OutFile = "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip"
+        UserAgent = "curl"
+    }
+    Invoke-WebRequest @params
     Add-Type -AssemblyName System.IO.Compression.FileSystem
-    [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Assert-Output $?
+    [System.IO.Compression.ZipFile]::ExtractToDirectory(
+        "$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip",
+        "$env:BUILD_SOURCESDIRECTORY/swig"
+    ) ; Assert-Output $?
     $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig"
-    $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH
+    $env:PATH = @("$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder", "$env:PATH") -join ";"
     $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log"
     cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $?
     Write-Output "CMake build logs:"
@@ -68,30 +76,30 @@ if ($env:PYTHON_VERSION -eq "3.7") {
     $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
 }
 
-conda create `
-    -y `
-    -n $env:CONDA_ENV `
-    --file $env:CONDA_REQUIREMENT_FILE `
-    "python=$env:PYTHON_VERSION[build=*cpython]" ; Assert-Output $?
+$condaParams = @(
+    "-y",
+    "-n", "$env:CONDA_ENV",
+    "--file", "$env:CONDA_REQUIREMENT_FILE",
+    "python=$env:PYTHON_VERSION[build=*cpython]"
+)
+conda create @condaParams ; Assert-Output $?
 
 if ($env:TASK -ne "bdist") {
     conda activate $env:CONDA_ENV
 }
 
-cd $env:BUILD_SOURCESDIRECTORY
+Set-Location "$env:BUILD_SOURCESDIRECTORY"
 if ($env:TASK -eq "regular") {
     cmake -B build -S . -A x64 ; Assert-Output $?
     cmake --build build --target ALL_BUILD --config Release ; Assert-Output $?
     sh ./build-python.sh install --precompile ; Assert-Output $?
-    cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-    cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-}
-elseif ($env:TASK -eq "sdist") {
+    cp ./Release/lib_lightgbm.dll "$env:BUILD_ARTIFACTSTAGINGDIRECTORY"
+    cp ./Release/lightgbm.exe "$env:BUILD_ARTIFACTSTAGINGDIRECTORY"
+} elseif ($env:TASK -eq "sdist") {
     sh ./build-python.sh sdist ; Assert-Output $?
     sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
-    cd dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $?
-}
-elseif ($env:TASK -eq "bdist") {
+    Set-Location dist; pip install @(Get-ChildItem *.gz) -v ; Assert-Output $?
+} elseif ($env:TASK -eq "bdist") {
     # Import the Chocolatey profile module so that the RefreshEnv command
     # invoked below properly updates the current PowerShell session environment.
     $module = "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
@@ -104,8 +112,8 @@ elseif ($env:TASK -eq "bdist") {
     conda activate $env:CONDA_ENV
     sh "build-python.sh" bdist_wheel --integrated-opencl ; Assert-Output $?
     sh ./.ci/check-python-dists.sh ./dist ; Assert-Output $?
-    cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $?
-    cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+    Set-Location dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Assert-Output $?
+    cp @(Get-ChildItem *py3-none-win_amd64.whl) "$env:BUILD_ARTIFACTSTAGINGDIRECTORY"
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
     if ($env:COMPILER -eq "MINGW") {
         sh ./build-python.sh install --mingw ; Assert-Output $?
@@ -116,9 +124,9 @@ elseif ($env:TASK -eq "bdist") {
 
 if (($env:TASK -eq "sdist") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) {
     # cannot test C API with "sdist" task
-    $tests = $env:BUILD_SOURCESDIRECTORY + "/tests/python_package_test"
+    $tests = "$env:BUILD_SOURCESDIRECTORY/tests/python_package_test"
 } else {
-    $tests = $env:BUILD_SOURCESDIRECTORY + "/tests"
+    $tests = "$env:BUILD_SOURCESDIRECTORY/tests"
 }
 if ($env:TASK -eq "bdist") {
     # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
@@ -128,15 +136,30 @@ if ($env:TASK -eq "bdist") {
 pytest $tests ; Assert-Output $?
 
 if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python"))) {
-    cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
+    Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide"
     @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py"
-    (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py"  # prevent interactive window mode
+    # Prevent interactive window mode
+    (Get-Content "plot_example.py").replace(
+        'graph.render(view=True)',
+        'graph.render(view=False)'
+    ) | Set-Content "plot_example.py"
     conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2"
+    # Run all examples
     foreach ($file in @(Get-ChildItem *.py)) {
-        @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
+        @(
+            "import sys, warnings",
+            -join @(
+                "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: ",
+                "sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))"
+            )
+        ) + (Get-Content $file) | Set-Content $file
         python $file ; Assert-Output $?
-    }  # run all examples
-    cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks
-    (Get-Content "interactive_plot_example.ipynb").replace('INTERACTIVE = False', 'assert False, \"Interactive mode disabled\"') | Set-Content "interactive_plot_example.ipynb"
-    jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $?  # run all notebooks
+    }
+    # Run all notebooks
+    Set-Location "$env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks"
+    (Get-Content "interactive_plot_example.ipynb").replace(
+        'INTERACTIVE = False',
+        'assert False, \"Interactive mode disabled\"'
+    ) | Set-Content "interactive_plot_example.ipynb"
+    jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Assert-Output $?
 }
diff --git a/.ci/test.sh b/.ci/test.sh
index 45ee65629744..cc8831f94c09 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -100,7 +100,7 @@ fi
 if [[ $TASK == "lint" ]]; then
     pwsh -command "Install-Module -Name PSScriptAnalyzer -Scope CurrentUser -SkipPublisherCheck"
     echo "Linting PowerShell code"
-    pwsh -file "./.ci/lint-powershell.ps1" || :
+    pwsh -file ./.ci/lint-powershell.ps1 || exit 1
     conda create -q -y -n "${CONDA_ENV}" \
         "${CONDA_PYTHON_REQUIREMENT}" \
         'biome>=1.9.3' \

From 5151fe85f08e5dccff7d48242dddace51f9c8ede Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 5 Nov 2024 06:04:38 -0600
Subject: [PATCH 10/27] [ci] [R-package] re-enable 'rchk' checks (#6713)

* intentionally miss an unprotect()

* re-enable rchk

* grep for errors

* restore all CI
---
 .github/workflows/r_package.yml | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index 1758583ad8e4..8811f53b61c0 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -274,6 +274,7 @@ jobs:
           - clang19
           - gcc14
           - intel
+          - rchk
     runs-on: ubuntu-latest
     container: ghcr.io/r-hub/containers/${{ matrix.image }}:latest
     steps:
@@ -311,8 +312,32 @@ jobs:
       - name: Install packages and run tests
         shell: bash
         run: |
-          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
           sh build-cran-package.sh
+
+          # 'rchk' isn't run through 'R CMD check', use the approach documented at
+          # https://r-hub.github.io/containers/local.html
+          if [[ "${{ matrix.image }}" =~ "rchk" ]]; then
+            r-check "$(pwd)" \
+            | tee ./rchk-logs.txt 2>&1
+
+            # the '-v' exceptions below are from R/rchk itself and not LightGBM:
+            # https://github.com/kalibera/rchk/issues/22#issuecomment-656036156
+            if grep -E '\[PB\]|ERROR' ./rchk-logs.txt \
+               | grep -v 'too many states' \
+               > /dev/null; \
+            then
+                echo "rchk found issues"
+                exit 1
+            else
+                echo "rchk did not find any issues"
+                exit 0
+            fi
+          fi
+
+          # 'testthat' is not needed by 'rchk', so avoid installing it until here
+          Rscript -e "install.packages('testthat', repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+
           if [[ "${{ matrix.image }}" =~ "clang" ]]; then
             # allowing the following NOTEs (produced by default in the clang images):
             #

From 4531ff548d43a8c7a35477b379f840e587cc2719 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 14 Nov 2024 20:35:16 -0600
Subject: [PATCH 11/27] [python-package] adapt to scikit-learn 1.6 testing
 changes, pin more packages in R 3.6 CI jobs (#6718)

---
 .ci/install-old-r-packages.R              | 79 +++++++++++++++++++++++
 .ci/test-r-package.sh                     |  4 +-
 python-package/lightgbm/compat.py         | 10 +++
 python-package/lightgbm/sklearn.py        | 13 +++-
 tests/python_package_test/test_sklearn.py | 38 +++++++++--
 5 files changed, 135 insertions(+), 9 deletions(-)
 create mode 100644 .ci/install-old-r-packages.R

diff --git a/.ci/install-old-r-packages.R b/.ci/install-old-r-packages.R
new file mode 100644
index 000000000000..e402c4d5ca12
--- /dev/null
+++ b/.ci/install-old-r-packages.R
@@ -0,0 +1,79 @@
+# [description]
+#
+#    Installs a pinned set of packages that worked together
+#    as of the last R 3.6 release.
+#
+
+.install_packages <- function(packages) {
+    install.packages(  # nolint: undesirable_function
+        pkgs = paste(  # nolint: paste
+            "https://cran.r-project.org/src/contrib/Archive"
+            , packages
+            , sep = "/"
+        )
+        , dependencies = FALSE
+        , lib = Sys.getenv("R_LIBS")
+        , repos = NULL
+    )
+}
+
+# when confronted with a bunch of URLs like this, install.packages() sometimes
+# struggles to determine install order... so install packages in batches here,
+# starting from the root of the dependency graph and working up
+
+# there was only a single release of {praise}, so there is no contrib/Archive URL for it
+install.packages(  # nolint: undesirable_function
+    pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz"
+    , dependencies = FALSE
+    , lib = Sys.getenv("R_LIBS")
+    , repos = NULL
+)
+
+.install_packages(c(
+    "brio/brio_1.1.4.tar.gz"              # nolint: non_portable_path
+    , "cli/cli_3.6.2.tar.gz"              # nolint: non_portable_path
+    , "crayon/crayon_1.5.2.tar.gz"        # nolint: non_portable_path
+    , "digest/digest_0.6.36.tar.gz"       # nolint: non_portable_path
+    , "evaluate/evaluate_0.23.tar.gz"     # nolint: non_portable_path
+    , "fansi/fansi_1.0.5.tar.gz"          # nolint: non_portable_path
+    , "fs/fs_1.6.4.tar.gz"                # nolint: non_portable_path
+    , "glue/glue_1.7.0.tar.gz"            # nolint: non_portable_path
+    , "jsonlite/jsonlite_1.8.8.tar.gz"    # nolint: non_portable_path
+    , "lattice/lattice_0.20-41.tar.gz"    # nolint: non_portable_path
+    , "magrittr/magrittr_2.0.2.tar.gz"    # nolint: non_portable_path
+    , "pkgconfig/pkgconfig_2.0.2.tar.gz"  # nolint: non_portable_path
+    , "ps/ps_1.8.0.tar.gz"                # nolint: non_portable_path
+    , "R6/R6_2.5.0.tar.gz"                # nolint: non_portable_path
+    , "rlang/rlang_1.1.3.tar.gz"          # nolint: non_portable_path
+    , "rprojroot/rprojroot_2.0.3.tar.gz"  # nolint: non_portable_path
+    , "utf8/utf8_1.2.3.tar.gz"            # nolint: non_portable_path
+    , "withr/withr_3.0.1.tar.gz"          # nolint: non_portable_path
+))
+
+.install_packages(c(
+    "desc/desc_1.4.2.tar.gz"              # nolint: non_portable_path
+    , "diffobj/diffobj_0.3.4.tar.gz"      # nolint: non_portable_path
+    , "lifecycle/lifecycle_1.0.3.tar.gz"  # nolint: non_portable_path
+    , "processx/processx_3.8.3.tar.gz"    # nolint: non_portable_path
+))
+
+.install_packages(c(
+    "callr/callr_3.7.5.tar.gz"    # nolint: non_portable_path
+    , "vctrs/vctrs_0.6.4.tar.gz"  # nolint: non_portable_path
+))
+
+.install_packages(c(
+    "pillar/pillar_1.8.1.tar.gz"    # nolint: non_portable_path
+    , "tibble/tibble_3.2.0.tar.gz"  # nolint: non_portable_path
+))
+
+.install_packages(c(
+    "pkgbuild/pkgbuild_1.4.4.tar.gz"    # nolint: non_portable_path
+    , "rematch2/rematch2_2.1.1.tar.gz"  # nolint: non_portable_path
+    , "waldo/waldo_0.5.3.tar.gz"        # nolint: non_portable_path
+))
+
+.install_packages(c(
+    "pkgload/pkgload_1.3.4.tar.gz"      # nolint: non_portable_path
+    , "testthat/testthat_3.2.1.tar.gz"  # nolint: non_portable_path
+))
diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh
index ae205213d787..a076fab0186c 100755
--- a/.ci/test-r-package.sh
+++ b/.ci/test-r-package.sh
@@ -108,10 +108,10 @@ if [[ $OS_NAME == "macos" ]]; then
     export R_TIDYCMD=/usr/local/bin/tidy
 fi
 
-# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
+# fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6
 # "Warning: dependency ‘lattice’ is not available"
 if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')"
+    Rscript --vanilla ./.ci/install-old-r-packages.R
 else
     # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
     # This should be unnecessary on R >=4.4.0
diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index 96dee6522572..0b9444b0ecbf 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -14,6 +14,14 @@
     from sklearn.utils.multiclass import check_classification_targets
     from sklearn.utils.validation import assert_all_finite, check_array, check_X_y
 
+    # sklearn.utils Tags types can be imported unconditionally once
+    # lightgbm's minimum scikit-learn version is 1.6 or higher
+    try:
+        from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags
+        from sklearn.utils import RegressorTags as _sklearn_RegressorTags
+    except ImportError:
+        _sklearn_ClassifierTags = None
+        _sklearn_RegressorTags = None
     try:
         from sklearn.exceptions import NotFittedError
         from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold
@@ -140,6 +148,8 @@ class _LGBMRegressorBase:  # type: ignore
     _LGBMCheckClassificationTargets = None
     _LGBMComputeSampleWeight = None
     _LGBMValidateData = None
+    _sklearn_ClassifierTags = None
+    _sklearn_RegressorTags = None
     _sklearn_version = None
 
 # additional scikit-learn imports only for type hints
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index c4d1200e99e4..614e3c3cbe7f 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -40,6 +40,8 @@
     _LGBMModelBase,
     _LGBMRegressorBase,
     _LGBMValidateData,
+    _sklearn_ClassifierTags,
+    _sklearn_RegressorTags,
     _sklearn_version,
     dt_DataTable,
     pd_DataFrame,
@@ -703,7 +705,6 @@ def _update_sklearn_tags_from_dict(
         tags.input_tags.allow_nan = tags_dict["allow_nan"]
         tags.input_tags.sparse = "sparse" in tags_dict["X_types"]
         tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"]
-        tags._xfail_checks = tags_dict["_xfail_checks"]
         return tags
 
     def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
@@ -1291,7 +1292,10 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        return LGBMModel.__sklearn_tags__(self)
+        tags = LGBMModel.__sklearn_tags__(self)
+        tags.estimator_type = "regressor"
+        tags.regressor_tags = _sklearn_RegressorTags(multi_label=False)
+        return tags
 
     def fit(  # type: ignore[override]
         self,
@@ -1350,7 +1354,10 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        return LGBMModel.__sklearn_tags__(self)
+        tags = LGBMModel.__sklearn_tags__(self)
+        tags.estimator_type = "classifier"
+        tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False)
+        return tags
 
     def fit(  # type: ignore[override]
         self,
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 6eca66ff20d3..d187e9df5a9f 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -17,11 +17,18 @@
 from sklearn.metrics import accuracy_score, log_loss, mean_squared_error, r2_score
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
 from sklearn.multioutput import ClassifierChain, MultiOutputClassifier, MultiOutputRegressor, RegressorChain
-from sklearn.utils.estimator_checks import parametrize_with_checks
+from sklearn.utils.estimator_checks import parametrize_with_checks as sklearn_parametrize_with_checks
 from sklearn.utils.validation import check_is_fitted
 
 import lightgbm as lgb
-from lightgbm.compat import DATATABLE_INSTALLED, PANDAS_INSTALLED, dt_DataTable, pd_DataFrame, pd_Series
+from lightgbm.compat import (
+    DATATABLE_INSTALLED,
+    PANDAS_INSTALLED,
+    _sklearn_version,
+    dt_DataTable,
+    pd_DataFrame,
+    pd_Series,
+)
 
 from .utils import (
     assert_silent,
@@ -35,6 +42,9 @@
     softmax,
 )
 
+SKLEARN_MAJOR, SKLEARN_MINOR, *_ = _sklearn_version.split(".")
+SKLEARN_VERSION_GTE_1_6 = (int(SKLEARN_MAJOR), int(SKLEARN_MINOR)) >= (1, 6)
+
 decreasing_generator = itertools.count(0, -1)
 estimator_classes = (lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker)
 task_to_model_factory = {
@@ -1432,7 +1442,28 @@ def test_getting_feature_names_in_pd_input(estimator_class):
     np.testing.assert_array_equal(model.feature_names_in_, X.columns)
 
 
-@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
+# Starting with scikit-learn 1.6 (https://github.com/scikit-learn/scikit-learn/pull/30149),
+# the only API for marking estimator tests as expected to fail is to pass a keyword argument
+# to parametrize_with_checks(). That function didn't accept additional arguments in earlier
+# versions.
+#
+# This block defines a patched version of parametrize_with_checks() so lightgbm's tests
+# can be compatible with scikit-learn <1.6 and >=1.6.
+#
+# This should be removed once minimum supported scikit-learn version is at least 1.6.
+if SKLEARN_VERSION_GTE_1_6:
+    parametrize_with_checks = sklearn_parametrize_with_checks
+else:
+
+    def parametrize_with_checks(estimator, *args, **kwargs):
+        return sklearn_parametrize_with_checks(estimator)
+
+
+def _get_expected_failed_tests(estimator):
+    return estimator._more_tags()["_xfail_checks"]
+
+
+@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()], expected_failed_checks=_get_expected_failed_tests)
 def test_sklearn_integration(estimator, check):
     estimator.set_params(min_child_samples=1, min_data_in_bin=1)
     check(estimator)
@@ -1457,7 +1488,6 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato
         assert sklearn_tags.input_tags.allow_nan is True
         assert sklearn_tags.input_tags.sparse is True
         assert sklearn_tags.target_tags.one_d_labels is True
-        assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"]
 
 
 @pytest.mark.parametrize("task", all_tasks)

From 83c0ff3de1925b0e2d4831a9ccb6ffc196aa795b Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 15 Nov 2024 06:30:23 -0600
Subject: [PATCH 12/27] [docs] add note about pyodide support (#6715)

* [docs] add note about pyodide support

* Update README.md

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

---------

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index f151c9db2ebe..39108559e8bc 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,8 @@ lightgbm-transform (feature transformation binding): https://github.com/microsof
 
 `postgresml` (LightGBM training and prediction in SQL, via a Postgres extension): https://github.com/postgresml/postgresml
 
+`pyodide` (run `lightgbm` Python-package in a web browser): https://github.com/pyodide/pyodide
+
 `vaex-ml` (Python DataFrame library with its own interface to LightGBM): https://github.com/vaexio/vaex
 
 Support

From 27b00d74169ac7756c48d7b6878d66fa5d678530 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 29 Nov 2024 23:34:39 -0600
Subject: [PATCH 13/27] [ci] [python-package] [R-package] adapt to scikit-learn
 check_sample_weight_equivalence changes, stop testing against R 3.6 on Linux
 (#6733)

---
 .ci/install-old-r-packages.R       | 79 ------------------------------
 .ci/test-r-package.sh              | 21 ++------
 .github/workflows/r_package.yml    | 19 +------
 python-package/lightgbm/sklearn.py | 18 ++++---
 4 files changed, 18 insertions(+), 119 deletions(-)
 delete mode 100644 .ci/install-old-r-packages.R

diff --git a/.ci/install-old-r-packages.R b/.ci/install-old-r-packages.R
deleted file mode 100644
index e402c4d5ca12..000000000000
--- a/.ci/install-old-r-packages.R
+++ /dev/null
@@ -1,79 +0,0 @@
-# [description]
-#
-#    Installs a pinned set of packages that worked together
-#    as of the last R 3.6 release.
-#
-
-.install_packages <- function(packages) {
-    install.packages(  # nolint: undesirable_function
-        pkgs = paste(  # nolint: paste
-            "https://cran.r-project.org/src/contrib/Archive"
-            , packages
-            , sep = "/"
-        )
-        , dependencies = FALSE
-        , lib = Sys.getenv("R_LIBS")
-        , repos = NULL
-    )
-}
-
-# when confronted with a bunch of URLs like this, install.packages() sometimes
-# struggles to determine install order... so install packages in batches here,
-# starting from the root of the dependency graph and working up
-
-# there was only a single release of {praise}, so there is no contrib/Archive URL for it
-install.packages(  # nolint: undesirable_function
-    pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz"
-    , dependencies = FALSE
-    , lib = Sys.getenv("R_LIBS")
-    , repos = NULL
-)
-
-.install_packages(c(
-    "brio/brio_1.1.4.tar.gz"              # nolint: non_portable_path
-    , "cli/cli_3.6.2.tar.gz"              # nolint: non_portable_path
-    , "crayon/crayon_1.5.2.tar.gz"        # nolint: non_portable_path
-    , "digest/digest_0.6.36.tar.gz"       # nolint: non_portable_path
-    , "evaluate/evaluate_0.23.tar.gz"     # nolint: non_portable_path
-    , "fansi/fansi_1.0.5.tar.gz"          # nolint: non_portable_path
-    , "fs/fs_1.6.4.tar.gz"                # nolint: non_portable_path
-    , "glue/glue_1.7.0.tar.gz"            # nolint: non_portable_path
-    , "jsonlite/jsonlite_1.8.8.tar.gz"    # nolint: non_portable_path
-    , "lattice/lattice_0.20-41.tar.gz"    # nolint: non_portable_path
-    , "magrittr/magrittr_2.0.2.tar.gz"    # nolint: non_portable_path
-    , "pkgconfig/pkgconfig_2.0.2.tar.gz"  # nolint: non_portable_path
-    , "ps/ps_1.8.0.tar.gz"                # nolint: non_portable_path
-    , "R6/R6_2.5.0.tar.gz"                # nolint: non_portable_path
-    , "rlang/rlang_1.1.3.tar.gz"          # nolint: non_portable_path
-    , "rprojroot/rprojroot_2.0.3.tar.gz"  # nolint: non_portable_path
-    , "utf8/utf8_1.2.3.tar.gz"            # nolint: non_portable_path
-    , "withr/withr_3.0.1.tar.gz"          # nolint: non_portable_path
-))
-
-.install_packages(c(
-    "desc/desc_1.4.2.tar.gz"              # nolint: non_portable_path
-    , "diffobj/diffobj_0.3.4.tar.gz"      # nolint: non_portable_path
-    , "lifecycle/lifecycle_1.0.3.tar.gz"  # nolint: non_portable_path
-    , "processx/processx_3.8.3.tar.gz"    # nolint: non_portable_path
-))
-
-.install_packages(c(
-    "callr/callr_3.7.5.tar.gz"    # nolint: non_portable_path
-    , "vctrs/vctrs_0.6.4.tar.gz"  # nolint: non_portable_path
-))
-
-.install_packages(c(
-    "pillar/pillar_1.8.1.tar.gz"    # nolint: non_portable_path
-    , "tibble/tibble_3.2.0.tar.gz"  # nolint: non_portable_path
-))
-
-.install_packages(c(
-    "pkgbuild/pkgbuild_1.4.4.tar.gz"    # nolint: non_portable_path
-    , "rematch2/rematch2_2.1.1.tar.gz"  # nolint: non_portable_path
-    , "waldo/waldo_0.5.3.tar.gz"        # nolint: non_portable_path
-))
-
-.install_packages(c(
-    "pkgload/pkgload_1.3.4.tar.gz"      # nolint: non_portable_path
-    , "testthat/testthat_3.2.1.tar.gz"  # nolint: non_portable_path
-))
diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh
index a076fab0186c..2e414ec0d282 100755
--- a/.ci/test-r-package.sh
+++ b/.ci/test-r-package.sh
@@ -20,12 +20,7 @@ fi
 
 # Get details needed for installing R components
 R_MAJOR_VERSION="${R_VERSION%.*}"
-if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    export R_MAC_VERSION=3.6.3
-    export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/R-${R_MAC_VERSION}.nn.pkg
-    export R_LINUX_VERSION="3.6.3-1bionic"
-    export R_APT_REPO="bionic-cran35/"
-elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then
+if [[ "${R_MAJOR_VERSION}" == "4" ]]; then
     export R_MAC_VERSION=4.3.1
     export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-${ARCH}/base/R-${R_MAC_VERSION}-${ARCH}.pkg
     export R_LINUX_VERSION="4.3.1-1.2204.0"
@@ -108,16 +103,10 @@ if [[ $OS_NAME == "macos" ]]; then
     export R_TIDYCMD=/usr/local/bin/tidy
 fi
 
-# fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6
-# "Warning: dependency ‘lattice’ is not available"
-if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    Rscript --vanilla ./.ci/install-old-r-packages.R
-else
-    # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
-    # This should be unnecessary on R >=4.4.0
-    # ref: https://github.com/microsoft/LightGBM/issues/6433
-    Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')"
-fi
+# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
+# This should be unnecessary on R >=4.4.0
+# ref: https://github.com/microsoft/LightGBM/issues/6433
+Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')"
 
 # manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0
 # ref: https://github.com/microsoft/LightGBM/issues/6433
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index 8811f53b61c0..66e05a18ba1f 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -14,10 +14,6 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  # https://github.com/actions/checkout/issues/1590#issuecomment-2207052044
-  #
-  # this could be removed (hopefully) when R 3.6 support is removed
-  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
   # in CMake-driven builds, parallelize compilation
   CMAKE_BUILD_PARALLEL_LEVEL: 4
   # on Debian-based images, avoid interactive prompts
@@ -48,12 +44,6 @@ jobs:
           ################
           # CMake builds #
           ################
-          - os: ubuntu-latest
-            task: r-package
-            compiler: gcc
-            r_version: 3.6
-            build_type: cmake
-            container: 'ubuntu:18.04'
           - os: ubuntu-latest
             task: r-package
             compiler: gcc
@@ -174,19 +164,12 @@ jobs:
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 5
           submodules: true
       - name: Install pandoc
         uses: r-lib/actions/setup-pandoc@v2
-        if: matrix.container != 'ubuntu:18.04'
-      # R 3.6 binary isn't easily available on Ubuntu 18.04,
-      # but setup-pandoc>=2.7.1 is uses a too-new glibc for it.
-      # ref: https://github.com/microsoft/LightGBM/issues/6298
-      - name: Install pandoc
-        uses: r-lib/actions/setup-pandoc@v2.6.0
-        if: matrix.container == 'ubuntu:18.04'
       - name: Install tinytex
         if: startsWith(matrix.os, 'windows')
         uses: r-lib/actions/setup-tinytex@v2
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 614e3c3cbe7f..d730b66c3556 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -673,6 +673,15 @@ def __init__(
     # is >=1.6.
     # ref: https://github.com/microsoft/LightGBM/pull/6651
     def _more_tags(self) -> Dict[str, Any]:
+        check_sample_weight_str = (
+            "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. "
+            "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' "
+            "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution "
+            "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)."
+        )
+        # "check_sample_weight_equivalence" can be removed when lightgbm's
+        # minimum supported scikit-learn version is at least 1.6
+        # ref: https://github.com/scikit-learn/scikit-learn/pull/30137
         return {
             "allow_nan": True,
             "X_types": ["2darray", "sparse", "1dlabels"],
@@ -680,12 +689,9 @@ def _more_tags(self) -> Dict[str, Any]:
                 "check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes "
                 "cannot be set in __init__: "
                 "(see https://github.com/microsoft/LightGBM/issues/2628)",
-                "check_sample_weight_equivalence": (
-                    "In LightGBM, setting a sample's weight to 0 can produce a different result than omitting the sample. "
-                    "Such samples intentionally still affect count-based measures like 'min_data_in_leaf' "
-                    "(https://github.com/microsoft/LightGBM/issues/5626#issuecomment-1712706678) and the estimated distribution "
-                    "of features for Dataset construction (see https://github.com/microsoft/LightGBM/issues/5553)."
-                ),
+                "check_sample_weight_equivalence": check_sample_weight_str,
+                "check_sample_weight_equivalence_on_dense_data": check_sample_weight_str,
+                "check_sample_weight_equivalence_on_sparse_data": check_sample_weight_str,
             },
         }
 

From 784f38415d4dd08ccefe2a536d08971020672cca Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Sun, 1 Dec 2024 05:51:56 +0100
Subject: [PATCH 14/27] [ci] Introduce `typos` pre-commit hook (#6564)

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .ci/test-r-package-windows.ps1                |  2 +-
 .github/workflows/lock.yml                    |  2 +-
 .pre-commit-config.yaml                       |  8 ++++-
 .typos.toml                                   | 21 +++++++++++
 CMakeLists.txt                                |  2 +-
 R-package/R/lgb.Booster.R                     |  4 +--
 R-package/R/lgb.importance.R                  |  2 +-
 R-package/R/lgb.model.dt.tree.R               |  2 +-
 R-package/R/lightgbm.R                        |  2 +-
 R-package/demo/cross_validation.R             |  2 +-
 R-package/demo/early_stopping.R               |  2 +-
 R-package/man/lgb.configure_fast_predict.Rd   |  4 +--
 R-package/man/lgb.importance.Rd               |  2 +-
 R-package/man/lgb.model.dt.tree.Rd            |  2 +-
 R-package/man/lightgbm.Rd                     |  2 +-
 R-package/tests/testthat/test_basic.R         |  4 +--
 .../tests/testthat/test_custom_objective.R    |  2 +-
 .../tests/testthat/test_lgb.interprete.R      |  2 +-
 .../testthat/test_lgb.plot.interpretation.R   |  4 +--
 cmake/Sanitizer.cmake                         |  2 +-
 docker/README.md                              |  4 +--
 docs/Parameters.rst                           |  2 +-
 docs/_static/js/script.js                     |  4 +--
 examples/lambdarank/train.conf                |  2 +-
 examples/regression/train.conf                | 10 +++---
 include/LightGBM/cuda/cuda_algorithms.hpp     |  6 ++--
 include/LightGBM/dataset.h                    |  2 +-
 include/LightGBM/utils/common.h               |  4 +--
 include/LightGBM/utils/random.h               |  4 +--
 python-package/lightgbm/basic.py              |  2 +-
 python-package/lightgbm/dask.py               |  2 +-
 src/boosting/bagging.hpp                      | 10 +++---
 src/boosting/gbdt_model_text.cpp              |  6 ++--
 src/io/metadata.cpp                           |  6 ++--
 src/network/linker_topo.cpp                   |  4 +--
 src/objective/rank_objective.hpp              |  2 +-
 .../cuda/cuda_best_split_finder.cpp           |  2 +-
 src/treelearner/cuda/cuda_data_partition.cu   | 18 +++++-----
 src/treelearner/cuda/cuda_data_partition.hpp  | 18 +++++-----
 .../cuda/cuda_histogram_constructor.cpp       |  2 +-
 .../cuda/cuda_histogram_constructor.hpp       |  2 +-
 src/treelearner/cuda/cuda_leaf_splits.cpp     |  8 ++---
 src/treelearner/cuda/cuda_leaf_splits.cu      | 16 ++++-----
 src/treelearner/cuda/cuda_leaf_splits.hpp     |  6 ++--
 .../data_parallel_tree_learner.cpp            |  6 ++--
 src/treelearner/feature_histogram.hpp         |  6 ++--
 src/treelearner/gpu_tree_learner.cpp          |  2 +-
 .../kernels/histogram_16_64_256.cu            |  4 +--
 src/treelearner/ocl/histogram16.cl            |  4 +--
 src/treelearner/ocl/histogram256.cl           |  2 +-
 src/treelearner/ocl/histogram64.cl            |  2 +-
 src/treelearner/parallel_tree_learner.h       |  8 ++---
 src/treelearner/serial_tree_learner.cpp       | 14 ++++----
 .../voting_parallel_tree_learner.cpp          | 12 +++----
 tests/cpp_tests/test_chunked_array.cpp        |  8 ++---
 tests/cpp_tests/test_stream.cpp               | 36 +++++++++----------
 tests/python_package_test/test_dask.py        |  2 +-
 tests/python_package_test/test_engine.py      |  2 +-
 58 files changed, 175 insertions(+), 148 deletions(-)
 create mode 100644 .typos.toml

diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index 1ce698a49c72..a3f524b60be7 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -171,7 +171,7 @@ Write-Output "Done installing Rtools"
 Write-Output "Installing CMake"
 Add-Type -AssemblyName System.IO.Compression.FileSystem
 [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $?
-# Remove old CMake shiped with RTools
+# Remove old CMake shipped with RTools
 Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore
 Write-Output "Done installing CMake"
 
diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 4efe658b7f45..195fd5f1c8f1 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -39,7 +39,7 @@ jobs:
             This pull request has been automatically locked since there has not been any recent activity since it was closed.
             To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues
             including a reference to this.
-          # what shoulld the locking status be?
+          # what should the locking status be?
           issue-lock-reason: 'resolved'
           pr-lock-reason: 'resolved'
           process-only: 'issues, prs'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7e5e5dd8e9d9..b334db19b8e7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,4 +38,10 @@ repos:
   - repo: https://github.com/shellcheck-py/shellcheck-py
     rev: v0.10.0.1
     hooks:
-    - id: shellcheck
+      - id: shellcheck
+  - repo: https://github.com/crate-ci/typos
+    rev: v1.23.2
+    hooks:
+      - id: typos
+        args: ["--force-exclude"]
+        exclude: (\.gitignore$)|(^\.editorconfig$)
diff --git a/.typos.toml b/.typos.toml
new file mode 100644
index 000000000000..6dc2c2c97529
--- /dev/null
+++ b/.typos.toml
@@ -0,0 +1,21 @@
+default.extend-ignore-re = [
+  "/Ot",
+  "mis-alignment",
+  "mis-spelled",
+  "posix-seh-rt",
+]
+
+[default.extend-words]
+MAPE = "MAPE"
+datas = "datas"
+interprete = "interprete"
+mape = "mape"
+splitted = "splitted"
+
+[default.extend-identifiers]
+ERRORs = "ERRORs"
+GAM = "GAM"
+ND24s = "ND24s"
+WARNINGs = "WARNINGs"
+fullset = "fullset"
+thess = "thess"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 183ef62bd68e..4f57cf9622e6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ option(USE_SWIG "Enable SWIG to generate Java API" OFF)
 option(USE_TIMETAG "Set to ON to output time costs" OFF)
 option(USE_CUDA "Enable CUDA-accelerated training " OFF)
 option(USE_DEBUG "Set to ON for Debug mode" OFF)
-option(USE_SANITIZER "Use santizer flags" OFF)
+option(USE_SANITIZER "Use sanitizer flags" OFF)
 set(
   ENABLED_SANITIZERS
   "address" "leak" "undefined"
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index a13516ff6569..85a91b1ce058 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -1114,7 +1114,7 @@ predict.lgb.Booster <- function(object,
 #'
 #'          Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
 #'          will cause it to ignore the fast-predict configuration and take the slow route instead
-#'          (but be aware that an existing configuration might not always be overriden by supplying
+#'          (but be aware that an existing configuration might not always be overridden by supplying
 #'          different parameters or prediction type, so make sure to check that the output is what
 #'          was expected when a prediction is to be made on a single row for something different than
 #'          what is configured).
@@ -1128,7 +1128,7 @@ predict.lgb.Booster <- function(object,
 #'          and as such, this function will produce an error if passing \code{csr=TRUE} and
 #'          \code{type = "contrib"} together.
 #' @inheritParams lgb_predict_shared_params
-#' @param model LighGBM model object (class \code{lgb.Booster}).
+#' @param model LightGBM model object (class \code{lgb.Booster}).
 #'
 #'              \bold{The object will be modified in-place}.
 #' @param csr Whether the prediction function is going to be called on sparse CSR inputs.
diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R
index 7c76131f4f53..d60507cf00d4 100644
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -9,7 +9,7 @@
 #'   \item{\code{Feature}: Feature names in the model.}
 #'   \item{\code{Gain}: The total gain of this feature's splits.}
 #'   \item{\code{Cover}: The number of observation related to this feature.}
-#'   \item{\code{Frequency}: The number of times a feature splited in trees.}
+#'   \item{\code{Frequency}: The number of times a feature split in trees.}
 #' }
 #'
 #' @examples
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index db4ef955f866..ac1b2f9aaf14 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -10,7 +10,7 @@
 #'        \emph{New in version 4.4.0}
 #'
 #' @return
-#' A \code{data.table} with detailed information about model trees' nodes and leafs.
+#' A \code{data.table} with detailed information about model trees' nodes and leaves.
 #'
 #' The columns of the \code{data.table} are:
 #'
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index efa593ffe12f..6cb4eebd8baf 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -139,7 +139,7 @@ NULL
 #'                    system, but be aware that getting the number of cores detected correctly requires package
 #'                    \code{RhpcBLASctl} to be installed.
 #'
-#'                    This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
+#'                    This parameter gets overridden by \code{num_threads} and its aliases under \code{params}
 #'                    if passed there.
 #'
 #'                    \emph{New in version 4.0.0}
diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R
index 0324f83f2da9..9f74ef7f4b2a 100644
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@@ -51,7 +51,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # For example, we are doing logistic loss, the prediction is score before logistic transformation
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
index 6ca214c5ac7b..4435dd1b09b6 100644
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -29,7 +29,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # For example, we are doing logistic loss, the prediction is score before logistic transformation
 # The built-in evaluation error assumes input is after logistic transformation
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd
index e02600451df5..9cd4339bdced 100644
--- a/R-package/man/lgb.configure_fast_predict.Rd
+++ b/R-package/man/lgb.configure_fast_predict.Rd
@@ -14,7 +14,7 @@ lgb.configure_fast_predict(
 )
 }
 \arguments{
-\item{model}{LighGBM model object (class \code{lgb.Booster}).
+\item{model}{LightGBM model object (class \code{lgb.Booster}).
 
              \bold{The object will be modified in-place}.}
 
@@ -98,7 +98,7 @@ Calling this function multiple times with different parameters might not overrid
 
          Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
          will cause it to ignore the fast-predict configuration and take the slow route instead
-         (but be aware that an existing configuration might not always be overriden by supplying
+         (but be aware that an existing configuration might not always be overridden by supplying
          different parameters or prediction type, so make sure to check that the output is what
          was expected when a prediction is to be made on a single row for something different than
          what is configured).
diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd
index 79cb82f5d8ef..5099643112be 100644
--- a/R-package/man/lgb.importance.Rd
+++ b/R-package/man/lgb.importance.Rd
@@ -17,7 +17,7 @@ For a tree model, a \code{data.table} with the following columns:
   \item{\code{Feature}: Feature names in the model.}
   \item{\code{Gain}: The total gain of this feature's splits.}
   \item{\code{Cover}: The number of observation related to this feature.}
-  \item{\code{Frequency}: The number of times a feature splited in trees.}
+  \item{\code{Frequency}: The number of times a feature split in trees.}
 }
 }
 \description{
diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd
index ecfee17332f5..df36b6a94f42 100644
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@@ -18,7 +18,7 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
        \emph{New in version 4.4.0}}
 }
 \value{
-A \code{data.table} with detailed information about model trees' nodes and leafs.
+A \code{data.table} with detailed information about model trees' nodes and leaves.
 
 The columns of the \code{data.table} are:
 
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 90cb3166bf5c..376a6d03a6b1 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -93,7 +93,7 @@ set to the iteration number of the best iteration.}
                    system, but be aware that getting the number of cores detected correctly requires package
                    \code{RhpcBLASctl} to be installed.
 
-                   This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
+                   This parameter gets overridden by \code{num_threads} and its aliases under \code{params}
                    if passed there.
 
                    \emph{New in version 4.0.0}}
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index c734816b4038..7310815c4a6d 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -9,7 +9,7 @@ set.seed(708L)
 #               to an accumulator then returns the current value.
 #               This is used to mock the situation where an evaluation
 #               metric increases every iteration
-ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR"
+ACCUMULATOR_NAME <- "INCREASING_METRIC_ACCUMULATOR"
 assign(x = ACCUMULATOR_NAME, value = 0.0, envir = .GlobalEnv)
 
 .increasing_metric <- function(preds, dtrain) {
@@ -1777,7 +1777,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th
     , early_stopping_rounds + 1L
   )
 
-  # Booster should understand thatt all three of these metrics should be minimized
+  # Booster should understand that all three of these metrics should be minimized
   eval_info <- bst$.__enclos_env__$private$get_eval_info()
   expect_identical(eval_info, c("mape", "rmse", "l1"))
   expect_identical(
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index 2c10b9d571dc..a1baf0067c4a 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -14,7 +14,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
   labels <- get_field(dtrain, "label")
diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R
index 322a80a55bc5..cfcd1c942f31 100644
--- a/R-package/tests/testthat/test_lgb.interprete.R
+++ b/R-package/tests/testthat/test_lgb.interprete.R
@@ -5,7 +5,7 @@
     log(x / (1.0 - x))
 }
 
-test_that("lgb.intereprete works as expected for binary classification", {
+test_that("lgb.interprete works as expected for binary classification", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/tests/testthat/test_lgb.plot.interpretation.R b/R-package/tests/testthat/test_lgb.plot.interpretation.R
index 6cba9927942a..e8a021fc7237 100644
--- a/R-package/tests/testthat/test_lgb.plot.interpretation.R
+++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R
@@ -5,7 +5,7 @@
     log(x / (1.0 - x))
 }
 
-test_that("lgb.plot.interepretation works as expected for binary classification", {
+test_that("lgb.plot.interpretation works as expected for binary classification", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -57,7 +57,7 @@ test_that("lgb.plot.interepretation works as expected for binary classification"
     expect_null(plot_res)
 })
 
-test_that("lgb.plot.interepretation works as expected for multiclass classification", {
+test_that("lgb.plot.interpretation works as expected for multiclass classification", {
     data(iris)
 
     # We must convert factors to numeric
diff --git a/cmake/Sanitizer.cmake b/cmake/Sanitizer.cmake
index a3768effac0d..f99048476d8b 100644
--- a/cmake/Sanitizer.cmake
+++ b/cmake/Sanitizer.cmake
@@ -18,7 +18,7 @@ macro(enable_sanitizer sanitizer)
     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined")
 
   else()
-    message(FATAL_ERROR "Santizer ${sanitizer} not supported.")
+    message(FATAL_ERROR "Sanitizer ${sanitizer} not supported.")
   endif()
 endmacro()
 
diff --git a/docker/README.md b/docker/README.md
index dfedc2f4e3f1..e68346545ccf 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -55,7 +55,7 @@ After this runs, a LightGBM model can be found at `LightGBM-CLI-model.txt`.
 
 For more details on how to configure and use the LightGBM CLI, see https://lightgbm.readthedocs.io/en/latest/Quick-Start.html.
 
-## Running the Python-package Сontainer
+## Running the Python-package Container
 
 Build an image with the LightGBM Python-package installed.
 
@@ -114,7 +114,7 @@ docker run \
     python
 ```
 
-## Running the R-package Сontainer
+## Running the R-package Container
 
 Build an image with the LightGBM R-package installed.
 
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 1f80a13d5731..b44d90ecec10 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -35,7 +35,7 @@ For example, in Python:
 
 .. code-block:: python
 
-   # use learning rate of 0.07, becase 'learning_rate'
+   # use learning rate of 0.07, because 'learning_rate'
    # is the primary parameter name
    lgb.train(
       params={
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index 3f129501e06f..c4717b8a0ee5 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -17,7 +17,7 @@ $(() => {
         $(
             '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
         ).appendTo("body");
-        const collapsable = [
+        const collapsible = [
             "#build-threadless-version-not-recommended",
             "#build-mpi-version",
             "#build-gpu-version",
@@ -25,7 +25,7 @@ $(() => {
             "#build-java-wrapper",
             "#build-c-unit-tests",
         ];
-        $.each(collapsable, (_, val) => {
+        $.each(collapsible, (_, val) => {
             const header = `${val} > :header:first`;
             const content = `${val} :not(:header:first)`;
             $(header).addClass("closed");
diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf
index 2aa2113b40d4..f007dcd6fe66 100644
--- a/examples/lambdarank/train.conf
+++ b/examples/lambdarank/train.conf
@@ -64,7 +64,7 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial
 
-# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
+# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
 # num_threads = 8
 
 # feature sub-sample, will random select 80% feature to train on each iteration
diff --git a/examples/regression/train.conf b/examples/regression/train.conf
index cd910af61dcf..992bc6c9ab53 100644
--- a/examples/regression/train.conf
+++ b/examples/regression/train.conf
@@ -20,7 +20,7 @@ objective = regression
 # binary_error
 metric = l2
 
-# frequence for metric output
+# frequency for metric output
 metric_freq = 1
 
 # true if need output metric for training data, alias: tranining_metric, train_metric
@@ -36,12 +36,12 @@ max_bin = 255
 # forcedbins_filename = forced_bins.json
 
 # training data
-# if exsting weight file, should name to "regression.train.weight"
+# if existing weight file, should name to "regression.train.weight"
 # alias: train_data, train
 data = regression.train
 
 # validation data, support multi validation data, separated by ','
-# if exsting weight file, should name to "regression.test.weight"
+# if existing weight file, should name to "regression.test.weight"
 # alias: valid, test, test_data,
 valid_data = regression.test
 
@@ -62,7 +62,7 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial
 
-# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
+# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
 # num_threads = 8
 
 # feature sub-sample, will random select 80% feature to train on each iteration
@@ -72,7 +72,7 @@ feature_fraction = 0.9
 # Support bagging (data sub-sample), will perform bagging every 5 iterations
 bagging_freq = 5
 
-# Bagging farction, will random select 80% data on bagging
+# Bagging fraction, will random select 80% data on bagging
 # alias: sub_row
 bagging_fraction = 0.8
 
diff --git a/include/LightGBM/cuda/cuda_algorithms.hpp b/include/LightGBM/cuda/cuda_algorithms.hpp
index f79fc57e4f42..abda07b1582f 100644
--- a/include/LightGBM/cuda/cuda_algorithms.hpp
+++ b/include/LightGBM/cuda/cuda_algorithms.hpp
@@ -115,7 +115,7 @@ __device__ __forceinline__ T ShuffleReduceSumWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceSum(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
@@ -145,7 +145,7 @@ __device__ __forceinline__ T ShuffleReduceMaxWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceMax(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
@@ -196,7 +196,7 @@ __device__ __forceinline__ T ShuffleReduceMinWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceMin(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index 220a1f9f009c..ef214b7cd89d 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -376,7 +376,7 @@ class Metadata {
   std::vector<data_size_t> query_boundaries_;
   /*! \brief Query weights */
   std::vector<label_t> query_weights_;
-  /*! \brief Number of querys */
+  /*! \brief Number of queries */
   data_size_t num_queries_;
   /*! \brief Number of Initial score, used to check correct weight file */
   int64_t num_init_score_;
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
index 6c3ebf5d0096..67bc07b0ecd5 100644
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -925,11 +925,11 @@ class AlignmentAllocator {
 
   inline ~AlignmentAllocator() throw() {}
 
-  inline pointer adress(reference r) {
+  inline pointer address(reference r) {
     return &r;
   }
 
-  inline const_pointer adress(const_reference r) const {
+  inline const_pointer address(const_reference r) const {
     return &r;
   }
 
diff --git a/include/LightGBM/utils/random.h b/include/LightGBM/utils/random.h
index 6f89f935b310..eb115ea96644 100644
--- a/include/LightGBM/utils/random.h
+++ b/include/LightGBM/utils/random.h
@@ -22,9 +22,9 @@ class Random {
   */
   Random() {
     std::random_device rd;
-    auto genrator = std::mt19937(rd());
+    auto generator = std::mt19937(rd());
     std::uniform_int_distribution<int> distribution(0, x);
-    x = distribution(genrator);
+    x = distribution(generator);
   }
   /*!
   * \brief Constructor, with specific seed
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index cf3723aadc63..99a690f38993 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -3525,7 +3525,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset":
             _log_warning(err_msg)
         self.feature_name = self.get_feature_name()
         _log_warning(
-            "Reseting categorical features.\n"
+            "Resetting categorical features.\n"
             "You can set new categorical features via ``set_categorical_feature`` method"
         )
         self.categorical_feature = "auto"
diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index e15979bc40db..dcdacba7366c 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -967,7 +967,7 @@ def _extract(items: List[Any], i: int) -> Any:
                     out[i].append(part)
 
             # by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix
-            # the code below is used instead to ensure that the sparse type is preserved during concatentation
+            # the code below is used instead to ensure that the sparse type is preserved during concatenation
             if isinstance(pred_meta, ss.csr_matrix):
                 concat_fn = partial(ss.vstack, format="csr")
             elif isinstance(pred_meta, ss.csc_matrix):
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 7a66b5696425..451384e6850a 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -73,17 +73,17 @@ class BaggingSampleStrategy : public SampleStrategy {
           for (data_size_t i = start_index + 1; i < end_index; ++i) {
             sampled_query_boundaries_[i] += sampled_query_boundaries_[i - 1];
           }
-          sampled_query_boundaires_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
+          sampled_query_boundaries_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
          });
 
         for (int thread_index = 1; thread_index < num_blocks; ++thread_index) {
-          sampled_query_boundaires_thread_buffer_[thread_index] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
+          sampled_query_boundaries_thread_buffer_[thread_index] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
         }
 
         Threading::For<data_size_t>(0, num_sampled_queries_ + 1, 128, [this](int thread_index, data_size_t start_index, data_size_t end_index) {
           if (thread_index > 0) {
             for (data_size_t i = start_index; i < end_index; ++i) {
-              sampled_query_boundaries_[i] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
+              sampled_query_boundaries_[i] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
             }
           }
         });
@@ -171,7 +171,7 @@ class BaggingSampleStrategy : public SampleStrategy {
       } else {
         bagging_runner_.ReSize(num_queries_);
         sampled_query_boundaries_.resize(num_queries_ + 1, 0);
-        sampled_query_boundaires_thread_buffer_.resize(num_threads_, 0);
+        sampled_query_boundaries_thread_buffer_.resize(num_threads_, 0);
         bag_query_indices_.resize(num_data_);
       }
       bagging_rands_.clear();
@@ -280,7 +280,7 @@ class BaggingSampleStrategy : public SampleStrategy {
   /*! \brief query boundaries of the in-bag queries */
   std::vector<data_size_t> sampled_query_boundaries_;
   /*! \brief buffer for calculating sampled_query_boundaries_ */
-  std::vector<data_size_t> sampled_query_boundaires_thread_buffer_;
+  std::vector<data_size_t> sampled_query_boundaries_thread_buffer_;
   /*! \brief in-bag query indices */
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_query_indices_;
   /*! \brief number of queries in the training dataset */
diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp
index 27be5afe066e..e8b6dd2332ef 100644
--- a/src/boosting/gbdt_model_text.cpp
+++ b/src/boosting/gbdt_model_text.cpp
@@ -545,17 +545,17 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
     }
   } else {
     std::vector<size_t> tree_sizes = CommonC::StringToArray<size_t>(key_vals["tree_sizes"].c_str(), ' ');
-    std::vector<size_t> tree_boundries(tree_sizes.size() + 1, 0);
+    std::vector<size_t> tree_boundaries(tree_sizes.size() + 1, 0);
     int num_trees = static_cast<int>(tree_sizes.size());
     for (int i = 0; i < num_trees; ++i) {
-      tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i];
+      tree_boundaries[i + 1] = tree_boundaries[i] + tree_sizes[i];
       models_.emplace_back(nullptr);
     }
     OMP_INIT_EX();
     #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
     for (int i = 0; i < num_trees; ++i) {
       OMP_LOOP_EX_BEGIN();
-      auto cur_p = p + tree_boundries[i];
+      auto cur_p = p + tree_boundaries[i];
       auto line_len = Common::GetLine(cur_p);
       std::string cur_line(cur_p, line_len);
       if (Common::StartsWith(cur_line, "Tree=")) {
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index f46e6d1c9f14..f6f07c434661 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -225,7 +225,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
       num_positions_ = 0;
     }
 
-    // check query boundries
+    // check query boundaries
     if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_data_) {
       query_boundaries_.clear();
       num_queries_ = 0;
@@ -282,7 +282,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
       }
     }
     if (query_load_from_file_) {
-      // check query boundries
+      // check query boundaries
       if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_all_data) {
         query_boundaries_.clear();
         num_queries_ = 0;
@@ -584,7 +584,7 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
   if (positions_.empty()) {
     positions_.resize(num_data_);
   } else {
-    Log::Warning("Overwritting positions in dataset.");
+    Log::Warning("Overwriting positions in dataset.");
   }
   num_positions_ = num_data_;
 
diff --git a/src/network/linker_topo.cpp b/src/network/linker_topo.cpp
index fccfb1e63829..af46ef4f494e 100644
--- a/src/network/linker_topo.cpp
+++ b/src/network/linker_topo.cpp
@@ -35,10 +35,10 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
   }
   BruckMap bruckMap(k);
   for (int j = 0; j < k; ++j) {
-    // set incoming rank at k-th commuication
+    // set incoming rank at k-th communication
     const int in_rank = (rank + distance[j]) % num_machines;
     bruckMap.in_ranks[j] = in_rank;
-    // set outgoing rank at k-th commuication
+    // set outgoing rank at k-th communication
     const int out_rank = (rank - distance[j] + num_machines) % num_machines;
     bruckMap.out_ranks[j] = out_rank;
   }
diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp
index a0710804baae..ba8496ec4864 100644
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -46,7 +46,7 @@ class RankingObjective : public ObjectiveFunction {
     position_ids_ = metadata.position_ids();
     // get number of different position ids
     num_position_ids_ = static_cast<data_size_t>(metadata.num_position_ids());
-    // get boundries
+    // get boundaries
     query_boundaries_ = metadata.query_boundaries();
     if (query_boundaries_ == nullptr) {
       Log::Fatal("Ranking tasks require query information");
diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp
index 95758542849c..e272ce744b1a 100644
--- a/src/treelearner/cuda/cuda_best_split_finder.cpp
+++ b/src/treelearner/cuda/cuda_best_split_finder.cpp
@@ -120,7 +120,7 @@ void CUDABestSplitFinder::Init() {
 void CUDABestSplitFinder::InitCUDAFeatureMetaInfo() {
   AllocateCUDAMemory<int8_t>(&cuda_is_feature_used_bytree_, static_cast<size_t>(num_features_), __FILE__, __LINE__);
 
-  // intialize split find task information (a split find task is one pass through the histogram of a feature)
+  // initialize split find task information (a split find task is one pass through the histogram of a feature)
   num_tasks_ = 0;
   for (int inner_feature_index = 0; inner_feature_index < num_features_; ++inner_feature_index) {
     const uint32_t num_bin = feature_num_bins_[inner_feature_index];
diff --git a/src/treelearner/cuda/cuda_data_partition.cu b/src/treelearner/cuda/cuda_data_partition.cu
index 3090b7a84176..4ca9d9279443 100644
--- a/src/treelearner/cuda/cuda_data_partition.cu
+++ b/src/treelearner/cuda/cuda_data_partition.cu
@@ -262,7 +262,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
   }
 }
 
-#define GenDataToLeftBitVectorKernel_PARMS \
+#define GenDataToLeftBitVectorKernel_PARAMS \
   const BIN_TYPE* column_data, \
   const data_size_t num_data_in_leaf, \
   const data_size_t* data_indices_in_leaf, \
@@ -286,7 +286,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, bool USE_MIN_BIN, typename BIN_TYPE>
 __global__ void GenDataToLeftBitVectorKernel(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   uint16_t* block_to_left_offset,
   data_size_t* block_to_left_offset_buffer,
   data_size_t* block_to_right_offset_buffer) {
@@ -335,7 +335,7 @@ __global__ void GenDataToLeftBitVectorKernel(
 
 template <typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool missing_is_zero,
   const bool missing_is_na,
   const bool mfb_is_zero,
@@ -363,7 +363,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool missing_is_na,
   const bool mfb_is_zero,
   const bool mfb_is_na,
@@ -380,7 +380,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool mfb_is_zero,
   const bool mfb_is_na,
   const bool max_bin_to_left,
@@ -396,7 +396,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool mfb_is_na,
   const bool max_bin_to_left,
   const bool is_single_feature_in_column) {
@@ -413,7 +413,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool max_bin_to_left,
   const bool is_single_feature_in_column) {
   if (!max_bin_to_left) {
@@ -429,7 +429,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner4(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool is_single_feature_in_column) {
   if (!is_single_feature_in_column) {
     GenDataToLeftBitVectorKernel
@@ -548,7 +548,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernel(
 
 #undef UpdateDataIndexToLeafIndexKernel_PARAMS
 #undef UpdateDataIndexToLeafIndex_ARGS
-#undef GenDataToLeftBitVectorKernel_PARMS
+#undef GenDataToLeftBitVectorKernel_PARAMS
 #undef GenBitVector_ARGS
 
 template <typename BIN_TYPE, bool USE_MIN_BIN>
diff --git a/src/treelearner/cuda/cuda_data_partition.hpp b/src/treelearner/cuda/cuda_data_partition.hpp
index f6bbab9b8c65..bfcce89af243 100644
--- a/src/treelearner/cuda/cuda_data_partition.hpp
+++ b/src/treelearner/cuda/cuda_data_partition.hpp
@@ -174,7 +174,7 @@ class CUDADataPartition {
     const int left_leaf_index,
     const int right_leaf_index);
 
-#define GenDataToLeftBitVectorKernel_PARMS \
+#define GenDataToLeftBitVectorKernel_PARAMS \
   const BIN_TYPE* column_data, \
   const data_size_t num_data_in_leaf, \
   const data_size_t* data_indices_in_leaf, \
@@ -187,7 +187,7 @@ class CUDADataPartition {
 
   template <typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool missing_is_zero,
     const bool missing_is_na,
     const bool mfb_is_zero,
@@ -197,7 +197,7 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner0(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool missing_is_na,
     const bool mfb_is_zero,
     const bool mfb_is_na,
@@ -206,7 +206,7 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner1(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool mfb_is_zero,
     const bool mfb_is_na,
     const bool max_bin_to_left,
@@ -214,23 +214,23 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner2(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool mfb_is_na,
     const bool max_bin_to_left,
     const bool is_single_feature_in_column);
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner3(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool max_bin_to_left,
     const bool is_single_feature_in_column);
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner4(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool is_single_feature_in_column);
 
-#undef GenDataToLeftBitVectorKernel_PARMS
+#undef GenDataToLeftBitVectorKernel_PARAMS
 
 #define UpdateDataIndexToLeafIndexKernel_PARAMS \
   const BIN_TYPE* column_data, \
@@ -379,7 +379,7 @@ class CUDADataPartition {
   int* cuda_split_info_buffer_;
 
   // dataset information
-  /*! \brief number of data in training set, for intialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
+  /*! \brief number of data in training set, for initialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
   data_size_t* cuda_num_data_;
 
 
diff --git a/src/treelearner/cuda/cuda_histogram_constructor.cpp b/src/treelearner/cuda/cuda_histogram_constructor.cpp
index 659db2aad24c..9f42eadec6f7 100644
--- a/src/treelearner/cuda/cuda_histogram_constructor.cpp
+++ b/src/treelearner/cuda/cuda_histogram_constructor.cpp
@@ -150,7 +150,7 @@ void CUDAHistogramConstructor::CalcConstructHistogramKernelDim(
   int* block_dim_y,
   const data_size_t num_data_in_smaller_leaf) {
   *block_dim_x = cuda_row_data_->max_num_column_per_partition();
-  *block_dim_y = NUM_THRADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
+  *block_dim_y = NUM_THREADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
   *grid_dim_x = cuda_row_data_->num_feature_partitions();
   *grid_dim_y = std::max(min_grid_dim_y_,
     ((num_data_in_smaller_leaf + NUM_DATA_PER_THREAD - 1) / NUM_DATA_PER_THREAD + (*block_dim_y) - 1) / (*block_dim_y));
diff --git a/src/treelearner/cuda/cuda_histogram_constructor.hpp b/src/treelearner/cuda/cuda_histogram_constructor.hpp
index ddc78cb17d90..655029d23ba5 100644
--- a/src/treelearner/cuda/cuda_histogram_constructor.hpp
+++ b/src/treelearner/cuda/cuda_histogram_constructor.hpp
@@ -19,7 +19,7 @@
 #include "cuda_leaf_splits.hpp"
 
 #define NUM_DATA_PER_THREAD (400)
-#define NUM_THRADS_PER_BLOCK (504)
+#define NUM_THREADS_PER_BLOCK (504)
 #define NUM_FEATURE_PER_THREAD_GROUP (28)
 #define SUBTRACT_BLOCK_SIZE (1024)
 #define FIX_HISTOGRAM_SHARED_MEM_SIZE (1024)
diff --git a/src/treelearner/cuda/cuda_leaf_splits.cpp b/src/treelearner/cuda/cuda_leaf_splits.cpp
index 803d4674ee48..2bdd0d47fae1 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.cpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.cpp
@@ -16,7 +16,7 @@ num_data_(num_data) {}
 CUDALeafSplits::~CUDALeafSplits() {}
 
 void CUDALeafSplits::Init(const bool use_quantized_grad) {
-  num_blocks_init_from_gradients_ = (num_data_ + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
+  num_blocks_init_from_gradients_ = (num_data_ + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
 
   // allocate more memory for sum reduction in CUDA
   // only the first element records the final sum
@@ -44,7 +44,7 @@ void CUDALeafSplits::InitValues(
   cuda_hessians_ = cuda_hessians;
   cuda_sum_of_gradients_buffer_.SetValue(0);
   cuda_sum_of_hessians_buffer_.SetValue(0);
-  LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
+  LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
   CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
@@ -59,7 +59,7 @@ void CUDALeafSplits::InitValues(
   const score_t* grad_scale, const score_t* hess_scale) {
   cuda_gradients_ = reinterpret_cast<const score_t*>(cuda_gradients_and_hessians);
   cuda_hessians_ = nullptr;
-  LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
+  LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
   CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
@@ -67,7 +67,7 @@ void CUDALeafSplits::InitValues(
 
 void CUDALeafSplits::Resize(const data_size_t num_data) {
   num_data_ = num_data;
-  num_blocks_init_from_gradients_ = (num_data + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
+  num_blocks_init_from_gradients_ = (num_data + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
   cuda_sum_of_gradients_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
   cuda_sum_of_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
   cuda_sum_of_gradients_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
diff --git a/src/treelearner/cuda/cuda_leaf_splits.cu b/src/treelearner/cuda/cuda_leaf_splits.cu
index ae505ecd55dd..0c796be9f20a 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.cu
+++ b/src/treelearner/cuda/cuda_leaf_splits.cu
@@ -180,23 +180,23 @@ void CUDALeafSplits::LaunchInitValuesEmptyKernel() {
   InitValuesEmptyKernel<<<1, 1>>>(cuda_struct_.RawData());
 }
 
-void CUDALeafSplits::LaunchInitValuesKernal(
+void CUDALeafSplits::LaunchInitValuesKernel(
   const double lambda_l1, const double lambda_l2,
   const data_size_t* cuda_bagging_data_indices,
   const data_size_t* cuda_data_indices_in_leaf,
   const data_size_t num_used_indices,
   hist_t* cuda_hist_in_leaf) {
   if (cuda_bagging_data_indices == nullptr) {
-    CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       cuda_gradients_, cuda_hessians_, num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData());
   } else {
-    CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       cuda_gradients_, cuda_hessians_, num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData());
   }
   SynchronizeCUDADevice(__FILE__, __LINE__);
-  CUDAInitValuesKernel2<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+  CUDAInitValuesKernel2<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
     lambda_l1, lambda_l2,
     num_blocks_init_from_gradients_,
     cuda_sum_of_gradients_buffer_.RawData(),
@@ -208,7 +208,7 @@ void CUDALeafSplits::LaunchInitValuesKernal(
   SynchronizeCUDADevice(__FILE__, __LINE__);
 }
 
-void CUDALeafSplits::LaunchInitValuesKernal(
+void CUDALeafSplits::LaunchInitValuesKernel(
   const double lambda_l1, const double lambda_l2,
   const data_size_t* cuda_bagging_data_indices,
   const data_size_t* cuda_data_indices_in_leaf,
@@ -217,17 +217,17 @@ void CUDALeafSplits::LaunchInitValuesKernal(
   const score_t* grad_scale,
   const score_t* hess_scale) {
   if (cuda_bagging_data_indices == nullptr) {
-    CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
   } else {
-    CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
   }
 
   SynchronizeCUDADevice(__FILE__, __LINE__);
-  CUDAInitValuesKernel4<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+  CUDAInitValuesKernel4<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
     lambda_l1, lambda_l2,
     num_blocks_init_from_gradients_,
     cuda_sum_of_gradients_buffer_.RawData(),
diff --git a/src/treelearner/cuda/cuda_leaf_splits.hpp b/src/treelearner/cuda/cuda_leaf_splits.hpp
index c2635346098b..43a0492452bd 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.hpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.hpp
@@ -13,7 +13,7 @@
 #include <LightGBM/utils/log.h>
 #include <LightGBM/meta.h>
 
-#define NUM_THRADS_PER_BLOCK_LEAF_SPLITS (1024)
+#define NUM_THREADS_PER_BLOCK_LEAF_SPLITS (1024)
 #define NUM_DATA_THREAD_ADD_LEAF_SPLITS (6)
 
 namespace LightGBM {
@@ -142,14 +142,14 @@ class CUDALeafSplits {
  private:
   void LaunchInitValuesEmptyKernel();
 
-  void LaunchInitValuesKernal(
+  void LaunchInitValuesKernel(
     const double lambda_l1, const double lambda_l2,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf,
     const data_size_t num_used_indices,
     hist_t* cuda_hist_in_leaf);
 
-  void LaunchInitValuesKernal(
+  void LaunchInitValuesKernel(
     const double lambda_l1, const double lambda_l2,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf,
diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp
index 64c342e5b01d..670788118455 100644
--- a/src/treelearner/data_parallel_tree_learner.cpp
+++ b/src/treelearner/data_parallel_tree_learner.cpp
@@ -260,12 +260,12 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
       if (smaller_leaf_num_bits <= 16) {
         std::memcpy(input_buffer_.data() + buffer_write_start_pos_int16_[feature_index],
                     this->smaller_leaf_histogram_array_[feature_index].RawDataInt16(),
-                    this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histgram());
+                    this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histogram());
       } else {
         if (local_smaller_leaf_num_bits == 32) {
           std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
                       this->smaller_leaf_histogram_array_[feature_index].RawDataInt32(),
-                      this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histgram());
+                      this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histogram());
         } else {
           this->smaller_leaf_histogram_array_[feature_index].CopyFromInt16ToInt32(
             input_buffer_.data() + buffer_write_start_pos_[feature_index]);
@@ -274,7 +274,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
     } else {
       std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
                 this->smaller_leaf_histogram_array_[feature_index].RawData(),
-                this->smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
+                this->smaller_leaf_histogram_array_[feature_index].SizeOfHistogram());
     }
   }
   global_timer.Stop("DataParallelTreeLearner::ReduceHistogram::Copy");
diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp
index 70dd0fb5436f..2d4abbd27af1 100644
--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -668,15 +668,15 @@ class FeatureHistogram {
   /*!
    * \brief Binary size of this histogram
    */
-  int SizeOfHistgram() const {
+  int SizeOfHistogram() const {
     return (meta_->num_bin - meta_->offset) * kHistEntrySize;
   }
 
-  int SizeOfInt32Histgram() const {
+  int SizeOfInt32Histogram() const {
     return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize;
   }
 
-  int SizeOfInt16Histgram() const {
+  int SizeOfInt16Histogram() const {
     return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize;
   }
 
diff --git a/src/treelearner/gpu_tree_learner.cpp b/src/treelearner/gpu_tree_learner.cpp
index 7c6c811c3b45..1bf21d65ccc6 100644
--- a/src/treelearner/gpu_tree_learner.cpp
+++ b/src/treelearner/gpu_tree_learner.cpp
@@ -777,7 +777,7 @@ void GPUTreeLearner::ResetIsConstantHessian(bool is_constant_hessian) {
 
 void GPUTreeLearner::BeforeTrain() {
   #if GPU_DEBUG >= 2
-  printf("Copying intial full gradients and hessians to device\n");
+  printf("Copying initial full gradients and hessians to device\n");
   #endif
   // Copy initial full hessians and gradients to GPU.
   // We start copying as early as possible, instead of at ConstructHistogram().
diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu
index d778d650f722..59662fb19d55 100644
--- a/src/treelearner/kernels/histogram_16_64_256.cu
+++ b/src/treelearner/kernels/histogram_16_64_256.cu
@@ -508,7 +508,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
     // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
     for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
         // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer large
+        // we don't need boundary check because we have made the buffer large
         int i_next = i + subglobal_size;
         #ifdef IGNORE_INDICES
         // we need to check to bounds here
@@ -752,7 +752,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
     // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
     // total size: 2 * 256 * size_of(float) = 2 KB
     // organization: each feature/grad/hessian is at a different bank,
-    //               as indepedent of the feature value as possible
+    //               as independent of the feature value as possible
     acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
 
     // counter histogram
diff --git a/src/treelearner/ocl/histogram16.cl b/src/treelearner/ocl/histogram16.cl
index 21624ec9ee10..be590c20666b 100644
--- a/src/treelearner/ocl/histogram16.cl
+++ b/src/treelearner/ocl/histogram16.cl
@@ -8,7 +8,7 @@
 #ifndef __OPENCL_VERSION__
 // If we are including this file in C++,
 // the entire source file following (except the last #endif) will become
-// a raw string literal. The extra ")" is just for mathcing parentheses
+// a raw string literal. The extra ")" is just for matching parentheses
 // to make the editor happy. The extra ")" and extra endif will be skipped.
 // DO NOT add anything between here and the next #ifdef, otherwise you need
 // to modify the skip count at the end of this file.
@@ -475,7 +475,7 @@ R""()
 
 
         // prefetch the next iteration variables
-        // we don't need bondary check because if it is out of boundary, ind_next = 0
+        // we don't need boundary check because if it is out of boundary, ind_next = 0
         #ifndef IGNORE_INDICES
         feature4_next = feature_data[ind_next];
         #endif
diff --git a/src/treelearner/ocl/histogram256.cl b/src/treelearner/ocl/histogram256.cl
index 3351f9efa7c3..b5c049e1272d 100644
--- a/src/treelearner/ocl/histogram256.cl
+++ b/src/treelearner/ocl/histogram256.cl
@@ -387,7 +387,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
     const uint subglobal_tid  = gtid - group_feature * subglobal_size;
     // extract feature mask, when a byte is set to 0, that feature is disabled
     #if ENABLE_ALL_FEATURES == 1
-    // hopefully the compiler will propogate the constants and eliminate all branches
+    // hopefully the compiler will propagate the constants and eliminate all branches
     uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff);
     #else
     uchar4 feature_mask = feature_masks[group_feature];
diff --git a/src/treelearner/ocl/histogram64.cl b/src/treelearner/ocl/histogram64.cl
index 48fa8c506d8b..4ec4d6371df5 100644
--- a/src/treelearner/ocl/histogram64.cl
+++ b/src/treelearner/ocl/histogram64.cl
@@ -454,7 +454,7 @@ R""()
 
 
         // prefetch the next iteration variables
-        // we don't need bondary check because if it is out of boundary, ind_next = 0
+        // we don't need boundary check because if it is out of boundary, ind_next = 0
         #ifndef IGNORE_INDICES
         feature4_next = feature_data[ind_next];
         #endif
diff --git a/src/treelearner/parallel_tree_learner.h b/src/treelearner/parallel_tree_learner.h
index b942dceab28b..aff8ac0fd4c5 100644
--- a/src/treelearner/parallel_tree_learner.h
+++ b/src/treelearner/parallel_tree_learner.h
@@ -148,12 +148,12 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
   * \brief Perform global voting
   * \param leaf_idx index of leaf
   * \param splits All splits from local voting
-  * \param out Result of gobal voting, only store feature indices
+  * \param out Result of global voting, only store feature indices
   */
   void GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits,
     std::vector<int>* out);
   /*!
-  * \brief Copy local histgram to buffer
+  * \brief Copy local histogram to buffer
   * \param smaller_top_features Selected features for smaller leaf
   * \param larger_top_features Selected features for larger leaf
   */
@@ -183,9 +183,9 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
   std::vector<comm_size_t> block_start_;
   /*! \brief Block size for reduce scatter */
   std::vector<comm_size_t> block_len_;
-  /*! \brief Read positions for feature histgrams at smaller leaf */
+  /*! \brief Read positions for feature histograms at smaller leaf */
   std::vector<comm_size_t> smaller_buffer_read_start_pos_;
-  /*! \brief Read positions for feature histgrams at larger leaf */
+  /*! \brief Read positions for feature histograms at larger leaf */
   std::vector<comm_size_t> larger_buffer_read_start_pos_;
   /*! \brief Size for reduce scatter */
   comm_size_t reduce_scatter_size_;
diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp
index 14ede072dc9e..01cdd7623c02 100644
--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -735,24 +735,24 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf,
 
 std::set<int> SerialTreeLearner::FindAllForceFeatures(Json force_split_leaf_setting) {
   std::set<int> force_features;
-  std::queue<Json> force_split_leafs;
+  std::queue<Json> force_split_leaves;
 
-  force_split_leafs.push(force_split_leaf_setting);
+  force_split_leaves.push(force_split_leaf_setting);
 
-  while (!force_split_leafs.empty()) {
-    Json split_leaf = force_split_leafs.front();
-    force_split_leafs.pop();
+  while (!force_split_leaves.empty()) {
+    Json split_leaf = force_split_leaves.front();
+    force_split_leaves.pop();
 
     const int feature_index = split_leaf["feature"].int_value();
     const int feature_inner_index = train_data_->InnerFeatureIndex(feature_index);
     force_features.insert(feature_inner_index);
 
     if (split_leaf.object_items().count("left") > 0) {
-      force_split_leafs.push(split_leaf["left"]);
+      force_split_leaves.push(split_leaf["left"]);
     }
 
     if (split_leaf.object_items().count("right") > 0) {
-      force_split_leafs.push(split_leaf["right"]);
+      force_split_leaves.push(split_leaf["right"]);
     }
   }
 
diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp
index b88db5a7ba28..37f2d4cf2641 100644
--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -207,9 +207,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
           smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
         }
         // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
-        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
-        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
+        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
+        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
         ++smaller_idx;
       }
       if (cur_used_features >= cur_total_feature) {
@@ -225,9 +225,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
           larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
         }
         // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
-        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
-        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
+        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
+        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
         ++larger_idx;
       }
     }
diff --git a/tests/cpp_tests/test_chunked_array.cpp b/tests/cpp_tests/test_chunked_array.cpp
index 9bfd857299ab..bc58918082a8 100644
--- a/tests/cpp_tests/test_chunked_array.cpp
+++ b/tests/cpp_tests/test_chunked_array.cpp
@@ -217,8 +217,8 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
   // Number of trials for each new ChunkedArray configuration. Pass 100 times over the search space:
   const size_t N_TRIALS = MAX_CHUNKS_SEARCH * MAX_IN_CHUNK_SEARCH_IDX * 100;
   const int INVALID = -1;  // A negative value signaling the requested value lives in an invalid address.
-  const int UNITIALIZED = -99;  // A negative value to signal this was never updated.
-  std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED);  // Memorize latest inserted values.
+  const int UNINITIALIZED = -99;  // A negative value to signal this was never updated.
+  std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED);  // Memorize latest inserted values.
 
   // Each outer loop iteration changes the test by adding +1 chunk. We start with 1 chunk only:
   for (size_t chunks = 1; chunks < MAX_CHUNKS_SEARCH; ++chunks) {
@@ -249,10 +249,10 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
   }
 
   // Final check: ensure even with overrides, all valid insertions store the latest value at that address:
-  std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED);
+  std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED);
   ca_.coalesce_to(coalesced_out.data(), true);  // Export all valid addresses.
   for (size_t i = 0; i < ref_values.size(); ++i) {
-    if (ref_values[i] != UNITIALIZED) {
+    if (ref_values[i] != UNINITIALIZED) {
       // Test in 2 ways that the values are correctly laid out in memory:
       EXPECT_EQ(ca_.getitem(i / CHUNK_SIZE, i % CHUNK_SIZE, INVALID), ref_values[i]);
       EXPECT_EQ(coalesced_out[i], ref_values[i]);
diff --git a/tests/cpp_tests/test_stream.cpp b/tests/cpp_tests/test_stream.cpp
index bc5f73b0a3ee..a656af1e2fe9 100644
--- a/tests/cpp_tests/test_stream.cpp
+++ b/tests/cpp_tests/test_stream.cpp
@@ -17,7 +17,7 @@ using LightGBM::TestUtils;
 
 void test_stream_dense(
   int8_t creation_type,
-  DatasetHandle ref_datset_handle,
+  DatasetHandle ref_dataset_handle,
   int32_t nrows,
   int32_t ncols,
   int32_t nclasses,
@@ -86,7 +86,7 @@ void test_stream_dense(
 
       case 1:
         Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows dense data with a batch size of %d", nrows, batch_count);
-        result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
+        result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
         EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
         break;
     }
@@ -131,7 +131,7 @@ void test_stream_dense(
 
 void test_stream_sparse(
   int8_t creation_type,
-  DatasetHandle ref_datset_handle,
+  DatasetHandle ref_dataset_handle,
   int32_t nrows,
   int32_t ncols,
   int32_t nclasses,
@@ -203,7 +203,7 @@ void test_stream_sparse(
 
       case 1:
         Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows sparse data with a batch size of %d", nrows, batch_count);
-        result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
+        result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
         EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
         break;
     }
@@ -249,13 +249,13 @@ void test_stream_sparse(
 
 TEST(Stream, PushDenseRowsWithMetadata) {
   // Load some test data
-  DatasetHandle ref_datset_handle;
+  DatasetHandle ref_dataset_handle;
   const char* params = "max_bin=15";
   // Use the smaller ".test" data because we don't care about the actual data and it's smaller
-  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
+  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
   EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
 
-  Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
+  Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
   auto noriginalrows = ref_dataset->num_data();
   Log::Info("Row count: %d", noriginalrows);
   Log::Info("Feature group count: %d", ref_dataset->num_features());
@@ -266,9 +266,9 @@ TEST(Stream, PushDenseRowsWithMetadata) {
   unused_init_scores.resize(noriginalrows * nclasses);
   std::vector<int32_t> unused_groups;
   unused_groups.assign(noriginalrows, 1);
-  result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
-  result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
 
   // Now use the reference dataset schema to make some testable Datasets with N rows each
@@ -290,23 +290,23 @@ TEST(Stream, PushDenseRowsWithMetadata) {
     for (size_t j = 0; j < batch_counts.size(); ++j) {
       auto type = creation_types[i];
       auto batch_count = batch_counts[j];
-      test_stream_dense(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
+      test_stream_dense(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
     }
   }
 
-  result = LGBM_DatasetFree(ref_datset_handle);
+  result = LGBM_DatasetFree(ref_dataset_handle);
   EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
 }
 
 TEST(Stream, PushSparseRowsWithMetadata) {
   // Load some test data
-  DatasetHandle ref_datset_handle;
+  DatasetHandle ref_dataset_handle;
   const char* params = "max_bin=15";
   // Use the smaller ".test" data because we don't care about the actual data and it's smaller
-  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
+  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
   EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
 
-  Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
+  Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
   auto noriginalrows = ref_dataset->num_data();
   Log::Info("Row count: %d", noriginalrows);
   Log::Info("Feature group count: %d", ref_dataset->num_features());
@@ -317,9 +317,9 @@ TEST(Stream, PushSparseRowsWithMetadata) {
   unused_init_scores.resize(noriginalrows * nclasses);
   std::vector<int32_t> unused_groups;
   unused_groups.assign(noriginalrows, 1);
-  result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
-  result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
 
   // Now use the reference dataset schema to make some testable Datasets with N rows each
@@ -344,10 +344,10 @@ TEST(Stream, PushSparseRowsWithMetadata) {
     for (size_t j = 0; j < batch_counts.size(); ++j) {
       auto type = creation_types[i];
       auto batch_count = batch_counts[j];
-      test_stream_sparse(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
+      test_stream_sparse(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
     }
   }
 
-  result = LGBM_DatasetFree(ref_datset_handle);
+  result = LGBM_DatasetFree(ref_dataset_handle);
   EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
 }
diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 2eeba46f2869..b5e17991f63d 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -471,7 +471,7 @@ def test_classifier_custom_objective(output, task, cluster):
         assert_eq(p1_proba, p1_proba_local)
 
 
-def test_machines_to_worker_map_unparseable_host_names():
+def test_machines_to_worker_map_unparsable_host_names():
     workers = {"0.0.0.1:80": {}, "0.0.0.2:80": {}}
     machines = "0.0.0.1:80,0.0.0.2:80"
     with pytest.raises(ValueError, match="Could not parse host name from worker address '0.0.0.1:80'"):
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 9ae471e7f4b9..cb2e893c9612 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -660,7 +660,7 @@ def test_ranking_prediction_early_stopping():
 
 
 # Simulates position bias for a given ranking dataset.
-# The ouput dataset is identical to the input one with the exception for the relevance labels.
+# The output dataset is identical to the input one with the exception for the relevance labels.
 # The new labels are generated according to an instance of a cascade user model:
 # for each query, the user is simulated to be traversing the list of documents ranked by a baseline ranker
 # (in our example it is simply the ordering by some feature correlated with relevance, e.g., 34)

From ea04c66c86e31ebf68ec151d75c14fbdfb6ea681 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 3 Dec 2024 15:08:10 +0300
Subject: [PATCH 15/27] [docs] update installation guide (#6696)

* Update Installation-Guide.rst

* Update script.js

* replace all Ninja sections with one paragraph

---------

Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
---
 docs/Installation-Guide.rst | 621 ++++++++++++++++++++++--------------
 docs/_static/js/script.js   |   4 +-
 2 files changed, 389 insertions(+), 236 deletions(-)

diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
index 41b84f9b82c2..1e28d037388d 100644
--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@@ -1,17 +1,30 @@
 Installation Guide
 ==================
 
-This is a guide for building the LightGBM Command Line Interface (CLI). If you want to build the Python-package or R-package please refer to `Python-package`_ and `R-package`_ folders respectively.
-
 All instructions below are aimed at compiling the 64-bit version of LightGBM.
 It is worth compiling the 32-bit version only in very rare special cases involving environmental limitations.
 The 32-bit version is slow and untested, so use it at your own risk and don't forget to adjust some of the commands below when installing.
 
+By default, instructions below will use **VS Build Tools** or **make** tool to compile the code.
+It it possible to use `Ninja`_ tool instead of make on all platforms, but VS Build Tools cannot be replaced with Ninja.
+You can add ``-G Ninja`` to CMake flags to use Ninja.
+
+By default, instructions below will produce a shared library file and an executable file with command-line interface.
+You can add ``-DBUILD_CLI=OFF`` to CMake flags to disable the executable compilation.
+
 If you need to build a static library instead of a shared one, you can add ``-DBUILD_STATIC_LIB=ON`` to CMake flags.
 
+By default, instructions below will place header files into system-wide folder.
+You can add ``-DINSTALL_HEADERS=OFF`` to CMake flags to disable headers installation.
+
+By default, on macOS, CMake is looking into Homebrew standard folders for finding dependencies (e.g. OpenMP).
+You can add ``-DUSE_HOMEBREW_FALLBACK=OFF`` to CMake flags to disable this behaviour.
+
 Users who want to perform benchmarking can make LightGBM output time costs for different internal routines by adding ``-DUSE_TIMETAG=ON`` to CMake flags.
 
-It is possible to build LightGBM in debug mode. In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM.
+It is possible to build LightGBM in debug mode.
+In this mode all compiler optimizations are disabled and LightGBM performs more checks internally.
+To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM.
 
 .. _sanitizers:
 
@@ -30,7 +43,7 @@ It is very useful to build `C++ unit tests <#build-c-unit-tests>`__ with sanitiz
 
 .. _nightly-builds:
 
-You can also download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|.
+You can download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|.
 
 .. contents:: **Contents**
     :depth: 1
@@ -40,12 +53,10 @@ You can also download the artifacts of the latest successful build on master bra
 Windows
 ~~~~~~~
 
-On Windows LightGBM can be built using
+On Windows, LightGBM can be built using
 
 - **Visual Studio**;
-
 - **CMake** and **VS Build Tools**;
-
 - **CMake** and **MinGW**.
 
 Visual Studio (or VS Build Tools)
@@ -54,22 +65,23 @@ Visual Studio (or VS Build Tools)
 With GUI
 ********
 
-1. Install `Visual Studio`_ (2015 or newer).
+1. Install `Visual Studio`_.
 
 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-3. Go to ``LightGBM-master/windows`` folder.
+3. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder.
+The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder.
 
 From Command Line
 *****************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -98,7 +110,7 @@ MinGW-w64
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.
 
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 It is recommended that you use **Visual Studio** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).
@@ -106,9 +118,17 @@ It is recommended that you use **Visual Studio** since it has better multithread
 Linux
 ~~~~~
 
-On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**.
+On Linux, LightGBM can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-1. Install `CMake`_.
+gcc
+^^^
+
+1. Install `CMake`_ and **gcc**.
 
 2. Run the following commands:
 
@@ -119,53 +139,69 @@ On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**.
      cmake -B build -S .
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+Clang
+^^^^^
 
-Using ``Ninja``
-^^^^^^^^^^^^^^^
+1. Install `CMake`_, **Clang** and **OpenMP**.
 
-On Linux, LightGBM can also be built with `Ninja <https://ninja-build.org/>`__ instead of ``make``.
+2. Run the following commands:
 
-.. code:: sh
+   .. code:: sh
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -G 'Ninja'
-     cmake --build build -j2
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S .
+     cmake --build build -j4
 
 macOS
 ~~~~~
 
-On macOS LightGBM can be installed using **Homebrew**, or can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, LightGBM can be installed using
 
-Apple Clang
-^^^^^^^^^^^
+- **Homebrew**;
+- **MacPorts**;
+
+or can be built using
 
-Only **Apple Clang** version 8.1 or higher is supported.
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
 
 Install Using ``Homebrew``
-**************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. code:: sh
 
   brew install lightgbm
 
-Build from GitHub
-*****************
+Refer to https://formulae.brew.sh/formula/lightgbm for more details.
 
-1. Install `CMake`_ :
+Install Using ``MacPorts``
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-   .. code:: sh
+.. code:: sh
 
-     brew install cmake
+  sudo port install LightGBM
+
+Refer to https://ports.macports.org/port/LightGBM for more details.
+
+**Note**: Port for LightGBM is not maintained by LightGBM's maintainers.
 
-2. Install **OpenMP**:
+Build from GitHub
+^^^^^^^^^^^^^^^^^
+
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
+
+Apple Clang
+***********
+
+1. Install `CMake`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install libomp
+     brew install cmake libomp
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -175,21 +211,15 @@ Build from GitHub
      cmake --build build -j4
 
 gcc
-^^^
-
-1. Install `CMake`_ :
-
-   .. code:: sh
-
-     brew install cmake
+***
 
-2. Install **gcc**:
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install gcc
+     brew install cmake gcc
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -213,12 +243,10 @@ You can build LightGBM without OpenMP support but it is **strongly not recommend
 Windows
 ^^^^^^^
 
-On Windows a version of LightGBM without OpenMP support can be built using
+On Windows, a version of LightGBM without OpenMP support can be built using
 
 - **Visual Studio**;
-
 - **CMake** and **VS Build Tools**;
-
 - **CMake** and **MinGW**.
 
 Visual Studio (or VS Build Tools)
@@ -227,26 +255,27 @@ Visual Studio (or VS Build Tools)
 With GUI
 --------
 
-1. Install `Visual Studio`_ (2015 or newer).
+1. Install `Visual Studio`_.
 
 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-3. Go to ``LightGBM-master/windows`` folder.
+3. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-4. Open ``LightGBM.sln`` file with **Visual Studio**.
+4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library.
 
-5. Go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``.
+5. Go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``.
 
-6. Get back to the project's main screen, then choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+6. Get back to the project's main screen and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder.
+The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder.
 
 From Command Line
 -----------------
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -275,14 +304,36 @@ MinGW-w64
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.
 
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 Linux
 ^^^^^
 
-On Linux a version of LightGBM without OpenMP support can be built using **CMake** and **gcc** or **Clang**.
+On Linux, a version of LightGBM without OpenMP support can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
+
+gcc
+***
+
+1. Install `CMake`_ and **gcc**.
+
+2. Run the following commands:
+
+   .. code:: sh
 
-1. Install `CMake`_.
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_OPENMP=OFF
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_ and **Clang**.
 
 2. Run the following commands:
 
@@ -290,20 +341,24 @@ On Linux a version of LightGBM without OpenMP support can be built using **CMake
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
      cmake -B build -S . -DUSE_OPENMP=OFF
      cmake --build build -j4
 
 macOS
 ^^^^^
 
-On macOS a version of LightGBM without OpenMP support can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, a version of LightGBM without OpenMP support can be built using
+
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
+
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
 
 Apple Clang
 ***********
 
-Only **Apple Clang** version 8.1 or higher is supported.
-
-1. Install `CMake`_ :
+1. Install `CMake`_:
 
    .. code:: sh
 
@@ -321,19 +376,13 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
+     brew install cmake gcc
 
-2. Install **gcc**:
-
-   .. code:: sh
-
-     brew install gcc
-
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -354,35 +403,36 @@ If you need to run a distributed learning application with high performance comm
 Windows
 ^^^^^^^
 
-On Windows an MPI version of LightGBM can be built using
+On Windows, an MPI version of LightGBM can be built using
 
 - **MS MPI** and **Visual Studio**;
-
 - **MS MPI**, **CMake** and **VS Build Tools**.
 
+**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it.
+
 With GUI
 ********
 
 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed.
 
-2. Install `Visual Studio`_ (2015 or newer).
+2. Install `Visual Studio`_.
 
 3. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-4. Go to ``LightGBM-master/windows`` folder.
+4. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release_mpi`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release_mpi`` folder.
 
 From Command Line
 *****************
 
 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed.
 
-2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 3. Run the following commands:
 
@@ -395,18 +445,22 @@ From Command Line
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.
 
-**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it.
-
 Linux
 ^^^^^
 
-On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **gcc** or **Clang**.
+On Linux, an MPI version of LightGBM can be built using
 
-1. Install `Open MPI`_.
+- **CMake**, **gcc** and **Open MPI**;
+- **CMake**, **Clang** and **Open MPI**.
 
-2. Install `CMake`_.
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-3. Run the following commands:
+gcc
+***
+
+1. Install `CMake`_, **gcc** and `Open MPI`_.
+
+2. Run the following commands:
 
    .. code:: sh
 
@@ -415,37 +469,41 @@ On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** a
      cmake -B build -S . -DUSE_MPI=ON
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
-
-macOS
-^^^^^
+Clang
+*****
 
-On macOS an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **Apple Clang** or **gcc**.
+1. Install `CMake`_, **Clang**, **OpenMP** and `Open MPI`_.
 
-Apple Clang
-***********
+2. Run the following commands:
 
-Only **Apple Clang** version 8.1 or higher is supported.
+   .. code:: sh
 
-1. Install `CMake`_ :
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_MPI=ON
+     cmake --build build -j4
 
-   .. code:: sh
+macOS
+^^^^^
 
-     brew install cmake
+On macOS, an MPI version of LightGBM can be built using
 
-2. Install **OpenMP**:
+- **CMake**, **Open MPI** and **Apple Clang**;
+- **CMake**, **Open MPI** and **gcc**.
 
-   .. code:: sh
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
 
-     brew install libomp
+Apple Clang
+***********
 
-3. Install **Open MPI**:
+1. Install `CMake`_, **OpenMP** and `Open MPI`_:
 
    .. code:: sh
 
-     brew install open-mpi
+     brew install cmake libomp open-mpi
 
-4. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -457,25 +515,13 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
-
-   .. code:: sh
-
-     brew install cmake
-
-2. Install **gcc**:
+1. Install `CMake`_, `Open MPI`_ and  **gcc**:
 
    .. code:: sh
 
-     brew install gcc
+     brew install cmake open-mpi gcc
 
-3. Install **Open MPI**:
-
-   .. code:: sh
-
-     brew install open-mpi
-
-4. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -488,48 +534,19 @@ gcc
 Build GPU Version
 ~~~~~~~~~~~~~~~~~
 
-Linux
-^^^^^
-
-On Linux a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **gcc** or **Clang**.
-
-The following dependencies should be installed before compilation:
-
--  **OpenCL** 1.2 headers and libraries, which is usually provided by GPU manufacture.
-
-   The generic OpenCL ICD packages (for example, Debian package ``ocl-icd-libopencl1`` and ``ocl-icd-opencl-dev``) can also be used.
-
--  **libboost** 1.56 or later (1.61 or later is recommended).
-
-   We use Boost.Compute as the interface to GPU, which is part of the Boost library since version 1.61. However, since we include the source code of Boost.Compute as a submodule, we only require the host has Boost 1.56 or later installed. We also use Boost.Align for memory allocation. Boost.Compute requires Boost.System and Boost.Filesystem to store offline kernel cache.
-
-   The following Debian packages should provide necessary Boost libraries: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``.
-
--  **CMake**
-
-To build LightGBM GPU version, run the following commands:
-
-.. code:: sh
-
-  git clone --recursive https://github.com/microsoft/LightGBM
-  cd LightGBM
-  cmake -B build -S . -DUSE_GPU=1
-  # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-  # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
-  cmake --build build
-
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
-
 Windows
 ^^^^^^^
 
-On Windows a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **VS Build Tools** or **MinGW**.
+On Windows, a GPU version of LightGBM (``device_type=gpu``) can be built using
+
+- **OpenCL**, **Boost**, **CMake** and **VS Build Tools**;
+- **OpenCL**, **Boost**, **CMake** and **MinGW**.
 
 If you use **MinGW**, the build procedure is similar to the build on Linux.
 
 Following procedure is for the **MSVC** (Microsoft Visual C++) build.
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is installed).
 
 2. Install **OpenCL** for Windows. The installation depends on the brand (NVIDIA, AMD, Intel) of your GPU card.
 
@@ -559,13 +576,68 @@ Following procedure is for the **MSVC** (Microsoft Visual C++) build.
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
+     cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
      # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-     # cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include"
+     # cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include"
      cmake --build build --target ALL_BUILD --config Release
 
    **Note**: ``C:/local/boost_1_63_0`` and ``C:/local/boost_1_63_0/lib64-msvc-14.0`` are locations of your **Boost** binaries (assuming you've downloaded 1.63.0 version for Visual Studio 2015).
 
+The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.
+
+Linux
+^^^^^
+
+On Linux, a GPU version of LightGBM (``device_type=gpu``) can be built using
+
+- **CMake**, **OpenCL**, **Boost** and **gcc**;
+- **CMake**, **OpenCL**, **Boost** and **Clang**.
+
+**OpenCL** headers and libraries are usually provided by GPU manufacture.
+The generic OpenCL ICD packages (for example, Debian packages ``ocl-icd-libopencl1``, ``ocl-icd-opencl-dev``, ``pocl-opencl-icd``) can also be used.
+
+Required **Boost** libraries (Boost.Align, Boost.System, Boost.Filesystem, Boost.Chrono) should be provided by the following Debian packages: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``, ``libboost-chrono-dev``.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
+
+gcc
+***
+
+1. Install `CMake`_, **gcc**, **OpenCL** and **Boost**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_GPU=ON
+     # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
+     # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang**, **OpenMP**, **OpenCL** and **Boost**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_GPU=ON
+     # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
+     # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+     cmake --build build -j4
+
+macOS
+^^^^^
+
+The GPU version is not supported on macOS.
+
 Docker
 ^^^^^^
 
@@ -574,60 +646,84 @@ Refer to `GPU Docker folder <https://github.com/microsoft/LightGBM/tree/master/d
 Build CUDA Version
 ~~~~~~~~~~~~~~~~~~
 
-The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
+The `original GPU version <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
 
-The CUDA-based build (``device_type=cuda``) is a separate implementation.
+The CUDA-based version (``device_type=cuda``) is a separate implementation.
 Use this version in Linux environments with an NVIDIA GPU with compute capability 6.0 or higher.
 
+Windows
+^^^^^^^
+
+The CUDA version is not supported on Windows.
+Use the `GPU version <#build-gpu-version>`__ (``device_type=gpu``) for GPU acceleration on Windows.
+
 Linux
 ^^^^^
 
-On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and **gcc** or **Clang**.
+On Linux, a CUDA version of LightGBM can be built using
 
-The following dependencies should be installed before compilation:
+- **CMake**, **gcc** and **CUDA**;
+- **CMake**, **Clang** and **CUDA**.
 
--  **CUDA** 11.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
+Please refer to `this detailed guide`_ for **CUDA** libraries installation.
 
--  **CMake**
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-To build LightGBM CUDA version, run the following commands:
+gcc
+***
 
-.. code:: sh
+1. Install `CMake`_, **gcc** and **CUDA**.
+
+2. Run the following commands:
 
-  git clone --recursive https://github.com/microsoft/LightGBM
-  cd LightGBM
-  cmake -B build -S . -DUSE_CUDA=1
-  cmake --build build -j4
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_CUDA=ON
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang**, **OpenMP** and **CUDA**.
+
+2. Run the following commands:
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_CUDA=ON
+     cmake --build build -j4
 
 macOS
 ^^^^^
 
 The CUDA version is not supported on macOS.
 
-Windows
-^^^^^^^
-
-The CUDA version is not supported on Windows.
-Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.
-
 Build Java Wrapper
 ~~~~~~~~~~~~~~~~~~
 
 Using the following instructions you can generate a JAR file containing the LightGBM `C API <./Development-Guide.rst#c-api>`__ wrapped by **SWIG**.
 
+After compilation the ``.jar`` file will be in ``LightGBM/build`` folder.
+
 Windows
 ^^^^^^^
 
-On Windows a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **VS Build Tools** or **MinGW**.
+On Windows, a Java wrapper of LightGBM can be built using
+
+- **Java**, **SWIG**, **CMake** and **VS Build Tools**;
+- **Java**, **SWIG**, **CMake** and **MinGW**.
 
 VS Build Tools
 **************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
-2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 3. Run the following commands:
 
@@ -638,14 +734,12 @@ VS Build Tools
      cmake -B build -S . -A x64 -DUSE_SWIG=ON
      cmake --build build --target ALL_BUILD --config Release
 
-The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/Release`` folder.
-
 MinGW-w64
 *********
 
 1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_.
 
-2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 3. Run the following commands:
 
@@ -656,9 +750,7 @@ MinGW-w64
      cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON
      cmake --build build -j4
 
-The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/`` folder.
-
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 It is recommended to use **VS Build Tools (Visual Studio)** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).
@@ -666,9 +758,15 @@ It is recommended to use **VS Build Tools (Visual Studio)** since it has better
 Linux
 ^^^^^
 
-On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **gcc** or **Clang**.
+On Linux, a Java wrapper of LightGBM can be built using
+
+- **CMake**, **gcc**, **Java** and **SWIG**;
+- **CMake**, **Clang**, **Java** and **SWIG**.
+
+gcc
+***
 
-1. Install `CMake`_, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+1. Install `CMake`_, **gcc**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 2. Run the following commands:
 
@@ -679,34 +777,40 @@ On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMa
      cmake -B build -S . -DUSE_SWIG=ON
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+Clang
+*****
 
-macOS
-^^^^^
+1. Install `CMake`_, **Clang**, **OpenMP**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
-On macOS a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **Apple Clang** or **gcc**.
+2. Run the following commands:
 
-First, install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
-Then, either follow the **Apple Clang** or **gcc** installation instructions below.
+   .. code:: sh
 
-Apple Clang
-***********
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_SWIG=ON
+     cmake --build build -j4
 
-Only **Apple Clang** version 8.1 or higher is supported.
+macOS
+^^^^^
 
-1. Install `CMake`_ :
+On macOS, a Java wrapper of LightGBM can be built using
 
-   .. code:: sh
+- **CMake**, **Java**, **SWIG** and **Apple Clang**;
+- **CMake**, **Java**, **SWIG** and **gcc**.
 
-     brew install cmake
+Apple Clang
+***********
 
-2. Install **OpenMP**:
+1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install libomp
+     brew install cmake openjdk swig libomp
+     export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/"
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -718,19 +822,14 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
-
-2. Install **gcc**:
-
-   .. code:: sh
-
-     brew install gcc
+     brew install cmake openjdk swig gcc
+     export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/"
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -740,15 +839,31 @@ gcc
      cmake -B build -S . -DUSE_SWIG=ON
      cmake --build build -j4
 
+Build Python-package
+~~~~~~~~~~~~~~~~~~~~
+
+Refer to `Python-package folder <https://github.com/microsoft/LightGBM/tree/master/python-package>`__.
+
+Build R-package
+~~~~~~~~~~~~~~~
+
+Refer to `R-package folder <https://github.com/microsoft/LightGBM/tree/master/R-package>`__.
+
 Build C++ Unit Tests
 ~~~~~~~~~~~~~~~~~~~~
 
 Windows
 ^^^^^^^
 
-On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Build Tools**.
+On Windows, C++ unit tests of LightGBM can be built using
+
+- **CMake** and **VS Build Tools**;
+- **CMake** and **MinGW**.
+
+VS Build Tools
+**************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -756,17 +871,43 @@ On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Bui
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm --config Debug
 
 The ``.exe`` file will be in ``LightGBM/Debug`` folder.
 
+MinGW-w64
+*********
+
+1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_.
+
+2. Run the following commands:
+
+   .. code:: console
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON
+     cmake --build build --target testlightgbm -j4
+
+The ``.exe`` file will be in ``LightGBM/`` folder.
+
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
+
 Linux
 ^^^^^
 
-On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** or **Clang**.
+On Linux, a C++ unit tests of LightGBM can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable file will be in ``LightGBM/`` folder.
+
+gcc
+***
 
-1. Install `CMake`_.
+1. Install `CMake`_ and **gcc**.
 
 2. Run the following commands:
 
@@ -774,24 +915,42 @@ On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** o
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
+     cmake --build build --target testlightgbm -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang** and **OpenMP**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 macOS
 ^^^^^
 
-On macOS a C++ unit tests of LightGBM can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, a C++ unit tests of LightGBM can be built using
+
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
+
+After compilation the executable file will be in ``LightGBM/`` folder.
 
 Apple Clang
 ***********
 
-Only **Apple Clang** version 8.1 or higher is supported.
-
-1. Install `CMake`_ :
+1. Install `CMake`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install cmake
+     brew install cmake libomp
 
 2. Run the following commands:
 
@@ -799,42 +958,32 @@ Only **Apple Clang** version 8.1 or higher is supported.
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
-
-2. Install **gcc**:
+     brew install cmake gcc
 
-   .. code:: sh
-
-     brew install gcc
-
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
      export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 
 .. |download artifacts| image:: ./_static/images/artifacts-not-available.svg
    :target: https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html
 
-.. _Python-package: https://github.com/microsoft/LightGBM/tree/master/python-package
-
-.. _R-package: https://github.com/microsoft/LightGBM/tree/master/R-package
-
 .. _Visual Studio: https://visualstudio.microsoft.com/downloads/
 
 .. _Git for Windows: https://git-scm.com/download/win
@@ -864,3 +1013,5 @@ gcc
 .. _this detailed guide: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
 
 .. _following docs: https://github.com/google/sanitizers/wiki
+
+.. _Ninja: https://ninja-build.org
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index c4717b8a0ee5..c6d21713fe5c 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -15,7 +15,7 @@ $(() => {
     /* Collapse specified sections in the installation guide */
     if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) {
         $(
-            '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
+            '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f054";} .opened:before {content: "\\f078";}</style>',
         ).appendTo("body");
         const collapsible = [
             "#build-threadless-version-not-recommended",
@@ -23,6 +23,8 @@ $(() => {
             "#build-gpu-version",
             "#build-cuda-version",
             "#build-java-wrapper",
+            "#build-python-package",
+            "#build-r-package",
             "#build-c-unit-tests",
         ];
         $.each(collapsible, (_, val) => {

From 6e0b0a8be44b14ade10737288a26aa361a00a18e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 3 Dec 2024 20:05:31 -0600
Subject: [PATCH 16/27] [python-package] simplify scikit-learn 1.6+ tags
 support (#6735)

---
 python-package/lightgbm/compat.py         | 10 ----------
 python-package/lightgbm/sklearn.py        | 15 +++++----------
 tests/python_package_test/test_sklearn.py |  6 ++++++
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index 0b9444b0ecbf..96dee6522572 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -14,14 +14,6 @@
     from sklearn.utils.multiclass import check_classification_targets
     from sklearn.utils.validation import assert_all_finite, check_array, check_X_y
 
-    # sklearn.utils Tags types can be imported unconditionally once
-    # lightgbm's minimum scikit-learn version is 1.6 or higher
-    try:
-        from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags
-        from sklearn.utils import RegressorTags as _sklearn_RegressorTags
-    except ImportError:
-        _sklearn_ClassifierTags = None
-        _sklearn_RegressorTags = None
     try:
         from sklearn.exceptions import NotFittedError
         from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold
@@ -148,8 +140,6 @@ class _LGBMRegressorBase:  # type: ignore
     _LGBMCheckClassificationTargets = None
     _LGBMComputeSampleWeight = None
     _LGBMValidateData = None
-    _sklearn_ClassifierTags = None
-    _sklearn_RegressorTags = None
     _sklearn_version = None
 
 # additional scikit-learn imports only for type hints
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d730b66c3556..108ef1e14498 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -40,8 +40,6 @@
     _LGBMModelBase,
     _LGBMRegressorBase,
     _LGBMValidateData,
-    _sklearn_ClassifierTags,
-    _sklearn_RegressorTags,
     _sklearn_version,
     dt_DataTable,
     pd_DataFrame,
@@ -726,7 +724,7 @@ def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
         # take whatever tags are provided by BaseEstimator, then modify
         # them with LightGBM-specific values
         return self._update_sklearn_tags_from_dict(
-            tags=_LGBMModelBase.__sklearn_tags__(self),
+            tags=super().__sklearn_tags__(),
             tags_dict=self._more_tags(),
         )
 
@@ -1298,10 +1296,7 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        tags = LGBMModel.__sklearn_tags__(self)
-        tags.estimator_type = "regressor"
-        tags.regressor_tags = _sklearn_RegressorTags(multi_label=False)
-        return tags
+        return super().__sklearn_tags__()
 
     def fit(  # type: ignore[override]
         self,
@@ -1360,9 +1355,9 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        tags = LGBMModel.__sklearn_tags__(self)
-        tags.estimator_type = "classifier"
-        tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False)
+        tags = super().__sklearn_tags__()
+        tags.classifier_tags.multi_class = True
+        tags.classifier_tags.multi_label = False
         return tags
 
     def fit(  # type: ignore[override]
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index d187e9df5a9f..1cdd047f1857 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1488,6 +1488,12 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato
         assert sklearn_tags.input_tags.allow_nan is True
         assert sklearn_tags.input_tags.sparse is True
         assert sklearn_tags.target_tags.one_d_labels is True
+        if estimator_class is lgb.LGBMClassifier:
+            assert sklearn_tags.estimator_type == "classifier"
+            assert sklearn_tags.classifier_tags.multi_class is True
+            assert sklearn_tags.classifier_tags.multi_label is False
+        elif estimator_class is lgb.LGBMRegressor:
+            assert sklearn_tags.estimator_type == "regressor"
 
 
 @pytest.mark.parametrize("task", all_tasks)

From d4d6c87db02a146ac6dc04b00f538e02a3b22250 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 5 Dec 2024 10:24:35 -0600
Subject: [PATCH 17/27] [c++] include <cstdint> wherever uint8_t is used
 (#6736)

---
 include/LightGBM/bin.h                     | 1 +
 include/LightGBM/cuda/cuda_column_data.hpp | 1 +
 include/LightGBM/cuda/cuda_row_data.hpp    | 1 +
 include/LightGBM/dataset.h                 | 1 +
 include/LightGBM/feature_group.h           | 1 +
 include/LightGBM/train_share_states.h      | 1 +
 include/LightGBM/tree.h                    | 1 +
 src/c_api.cpp                              | 1 +
 src/io/cuda/cuda_column_data.cpp           | 2 ++
 src/io/json11.cpp                          | 1 +
 10 files changed, 11 insertions(+)

diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h
index a33fcfa9c45c..5826f2387102 100644
--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -9,6 +9,7 @@
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/file_io.h>
 
+#include <cstdint>
 #include <limits>
 #include <string>
 #include <functional>
diff --git a/include/LightGBM/cuda/cuda_column_data.hpp b/include/LightGBM/cuda/cuda_column_data.hpp
index 314a178859c6..8875cd151d7d 100644
--- a/include/LightGBM/cuda/cuda_column_data.hpp
+++ b/include/LightGBM/cuda/cuda_column_data.hpp
@@ -13,6 +13,7 @@
 #include <LightGBM/bin.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 
+#include <cstdint>
 #include <vector>
 
 namespace LightGBM {
diff --git a/include/LightGBM/cuda/cuda_row_data.hpp b/include/LightGBM/cuda/cuda_row_data.hpp
index 1d4cb2f73b1e..85da72bc083d 100644
--- a/include/LightGBM/cuda/cuda_row_data.hpp
+++ b/include/LightGBM/cuda/cuda_row_data.hpp
@@ -15,6 +15,7 @@
 #include <LightGBM/train_share_states.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 
+#include <cstdint>
 #include <vector>
 
 #define COPY_SUBROW_BLOCK_SIZE_ROW_DATA (1024)
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index ef214b7cd89d..c2a4b62296f2 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -15,6 +15,7 @@
 #include <LightGBM/utils/random.h>
 #include <LightGBM/utils/text_reader.h>
 
+#include <cstdint>
 #include <string>
 #include <functional>
 #include <map>
diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h
index f13a5fff966f..bcc0388ba507 100644
--- a/include/LightGBM/feature_group.h
+++ b/include/LightGBM/feature_group.h
@@ -10,6 +10,7 @@
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/random.h>
 
+#include <cstdint>
 #include <cstdio>
 #include <memory>
 #include <vector>
diff --git a/include/LightGBM/train_share_states.h b/include/LightGBM/train_share_states.h
index f102668edf70..e4f4e4afea5f 100644
--- a/include/LightGBM/train_share_states.h
+++ b/include/LightGBM/train_share_states.h
@@ -11,6 +11,7 @@
 #include <LightGBM/utils/threading.h>
 
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <vector>
 
diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h
index c28ddd140c48..bc5af621e402 100644
--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -8,6 +8,7 @@
 #include <LightGBM/dataset.h>
 #include <LightGBM/meta.h>
 
+#include <cstdint>
 #include <string>
 #include <map>
 #include <memory>
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 98748bc9ff2f..cf6577ad5e2c 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -22,6 +22,7 @@
 
 #include <string>
 #include <cstdio>
+#include <cstdint>
 #include <functional>
 #include <memory>
 #include <mutex>
diff --git a/src/io/cuda/cuda_column_data.cpp b/src/io/cuda/cuda_column_data.cpp
index eb0938c01225..415578847f07 100644
--- a/src/io/cuda/cuda_column_data.cpp
+++ b/src/io/cuda/cuda_column_data.cpp
@@ -7,6 +7,8 @@
 
 #include <LightGBM/cuda/cuda_column_data.hpp>
 
+#include <cstdint>
+
 namespace LightGBM {
 
 CUDAColumnData::CUDAColumnData(const data_size_t num_data, const int gpu_device_id) {
diff --git a/src/io/json11.cpp b/src/io/json11.cpp
index 32a9c9d718b7..acd09f9ecb12 100644
--- a/src/io/json11.cpp
+++ b/src/io/json11.cpp
@@ -23,6 +23,7 @@
 #include <LightGBM/utils/log.h>
 
 #include <cmath>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <limits>

From 33764e131e3556a4fb5ee11901e91a03ad0c37b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 9 Dec 2024 16:06:19 -0600
Subject: [PATCH 18/27] [ci] set upper bound on dask (#6742)

---
 .ci/conda-envs/ci-core.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/conda-envs/ci-core.txt b/.ci/conda-envs/ci-core.txt
index a0763580c7f3..46d20963ed98 100644
--- a/.ci/conda-envs/ci-core.txt
+++ b/.ci/conda-envs/ci-core.txt
@@ -18,7 +18,7 @@
 
 # direct imports
 cffi>=1.16
-dask>=2023.5.0
+dask>=2023.5.0,<2024.12
 joblib>=1.3.2
 matplotlib-base>=3.7.3
 numpy>=1.24.4

From ae76aad6a591ddd41723c12a3f236643bb0ba2c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 10 Dec 2024 03:11:52 -0600
Subject: [PATCH 19/27] [python-package] do not copy column-major numpy arrays
 when creating Dataset (#6721)

* do not copy column-major numpy arrays when creating Dataset

* fix logic

* lint

* code review

* update test

* move dataset test to basic

* increase features

* assert single layout

---------

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 python-package/lightgbm/basic.py        | 28 ++++++++++++++-----
 tests/python_package_test/test_basic.py | 36 +++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 99a690f38993..1db55385af1b 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -188,6 +188,23 @@ def _get_sample_count(total_nrow: int, params: str) -> int:
     return sample_cnt.value
 
 
+def _np2d_to_np1d(mat: np.ndarray) -> Tuple[np.ndarray, int]:
+    if mat.dtype in (np.float32, np.float64):
+        dtype = mat.dtype
+    else:
+        dtype = np.float32
+    if mat.flags["F_CONTIGUOUS"]:
+        order = "F"
+        layout = _C_API_IS_COL_MAJOR
+    else:
+        order = "C"
+        layout = _C_API_IS_ROW_MAJOR
+    # ensure dtype and order, copies if either do not match
+    data = np.asarray(mat, dtype=dtype, order=order)
+    # flatten array without copying
+    return data.ravel(order=order), layout
+
+
 class _MissingType(Enum):
     NONE = "None"
     NAN = "NaN"
@@ -684,7 +701,8 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
 _C_API_DTYPE_INT32 = 2
 _C_API_DTYPE_INT64 = 3
 
-"""Matrix is row major in Python"""
+"""Macro definition of data order in matrix"""
+_C_API_IS_COL_MAJOR = 0
 _C_API_IS_ROW_MAJOR = 1
 
 """Macro definition of prediction type in C API of LightGBM"""
@@ -2297,11 +2315,7 @@ def __init_from_np2d(
             raise ValueError("Input numpy.ndarray must be 2 dimensional")
 
         self._handle = ctypes.c_void_p()
-        if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
-        else:  # change non-float data to float data, need to copy
-            data = np.asarray(mat.reshape(mat.size), dtype=np.float32)
-
+        data, layout = _np2d_to_np1d(mat)
         ptr_data, type_ptr_data, _ = _c_float_array(data)
         _safe_call(
             _LIB.LGBM_DatasetCreateFromMat(
@@ -2309,7 +2323,7 @@ def __init_from_np2d(
                 ctypes.c_int(type_ptr_data),
                 ctypes.c_int32(mat.shape[0]),
                 ctypes.c_int32(mat.shape[1]),
-                ctypes.c_int(_C_API_IS_ROW_MAJOR),
+                ctypes.c_int(layout),
                 _c_str(params_str),
                 ref_dataset,
                 ctypes.byref(self._handle),
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 0dfe3e47fa11..bdd4d3f58b80 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -947,3 +947,39 @@ def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(c
         "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
     )
     assert expected_warning in capsys.readouterr().out
+
+
+@pytest.mark.parametrize("order", ["C", "F"])
+@pytest.mark.parametrize("dtype", ["float32", "int64"])
+def test_no_copy_in_dataset_from_numpy_2d(rng, order, dtype):
+    X = rng.random(size=(100, 3))
+    X = np.require(X, dtype=dtype, requirements=order)
+    X1d, layout = lgb.basic._np2d_to_np1d(X)
+    if order == "F":
+        assert layout == lgb.basic._C_API_IS_COL_MAJOR
+    else:
+        assert layout == lgb.basic._C_API_IS_ROW_MAJOR
+    if dtype == "float32":
+        assert np.shares_memory(X, X1d)
+    else:
+        # makes a copy
+        assert not np.shares_memory(X, X1d)
+
+
+def test_equal_datasets_from_row_major_and_col_major_data(tmp_path):
+    # row-major dataset
+    X_row, y = make_blobs(n_samples=1_000, n_features=3, centers=2)
+    assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"]
+    ds_row = lgb.Dataset(X_row, y)
+    ds_row_path = tmp_path / "ds_row.txt"
+    ds_row._dump_text(ds_row_path)
+
+    # col-major dataset
+    X_col = np.asfortranarray(X_row)
+    assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"]
+    ds_col = lgb.Dataset(X_col, y)
+    ds_col_path = tmp_path / "ds_col.txt"
+    ds_col._dump_text(ds_col_path)
+
+    # check datasets are equal
+    assert filecmp.cmp(ds_row_path, ds_col_path)

From 186c7cd47a72c080ccfccdf799b0fbe8da2ff53a Mon Sep 17 00:00:00 2001
From: Murphy Liang <lxsmd39@163.com>
Date: Wed, 11 Dec 2024 12:22:38 +0800
Subject: [PATCH 20/27] [c++] fix parallel_tree_learner_split_info (#6738)

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
---
 src/treelearner/split_info.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/treelearner/split_info.hpp b/src/treelearner/split_info.hpp
index 234105eb9a34..8d33a6a76854 100644
--- a/src/treelearner/split_info.hpp
+++ b/src/treelearner/split_info.hpp
@@ -53,7 +53,7 @@ struct SplitInfo {
   bool default_left = true;
   int8_t monotone_type = 0;
   inline static int Size(int max_cat_threshold) {
-    return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t);
+    return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t) + sizeof(int64_t)*2;
   }
 
   inline void CopyTo(char* buffer) const {

From 53e0ddf7cd6eb281e3bec6273b19ff541c69bfa6 Mon Sep 17 00:00:00 2001
From: Scott Moser <smoser@brickies.net>
Date: Wed, 11 Dec 2024 22:40:37 -0500
Subject: [PATCH 21/27] [R-package] Avoid bashisms (non-POSIX code) in
 R-package/configure (#6746)

---
 R-package/configure    | 8 ++++----
 R-package/configure.ac | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/R-package/configure b/R-package/configure
index 11d691674f69..56a1fcc49105 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1789,7 +1789,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mmprefetch}" >&5
 printf "%s\n" "${ac_mmprefetch}" >&6; }
 if test "${ac_mmprefetch}" = yes; then
-    LGB_CPPFLAGS+=" -DMM_PREFETCH=1"
+    LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1"
 fi
 
 ############
@@ -1824,7 +1824,7 @@ ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftes
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_mm_malloc}" >&5
 printf "%s\n" "${ac_mm_malloc}" >&6; }
 if test "${ac_mm_malloc}" = yes; then
-    LGB_CPPFLAGS+=" -DMM_MALLOC=1"
+    LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1"
 fi
 
 ##########
@@ -1850,11 +1850,11 @@ then
     # If Homebrew is found and libomp was installed with it, this code adds the necessary
     # flags for the compiler to find libomp headers and for the linker to find libomp.dylib.
     HOMEBREW_LIBOMP_PREFIX=""
-    if command -v brew &> /dev/null; then
+    if command -v brew >/dev/null 2>&1; then
         ac_brew_openmp=no
         { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP was installed via Homebrew" >&5
 printf %s "checking whether OpenMP was installed via Homebrew... " >&6; }
-        brew --prefix libomp &>/dev/null && ac_brew_openmp=yes
+        brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes
         { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_brew_openmp}" >&5
 printf "%s\n" "${ac_brew_openmp}" >&6; }
         if test "${ac_brew_openmp}" = yes; then
diff --git a/R-package/configure.ac b/R-package/configure.ac
index dad365be691c..d0f0462aef60 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -60,7 +60,7 @@ AC_LANG_CONFTEST(
 ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mmprefetch=yes
 AC_MSG_RESULT([${ac_mmprefetch}])
 if test "${ac_mmprefetch}" = yes; then
-    LGB_CPPFLAGS+=" -DMM_PREFETCH=1"
+    LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_PREFETCH=1"
 fi
 
 ############
@@ -86,7 +86,7 @@ AC_LANG_CONFTEST(
 ${CXX} ${CPPFLAGS} ${CXXFLAGS} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_mm_malloc=yes
 AC_MSG_RESULT([${ac_mm_malloc}])
 if test "${ac_mm_malloc}" = yes; then
-    LGB_CPPFLAGS+=" -DMM_MALLOC=1"
+    LGB_CPPFLAGS="${LGB_CPPFLAGS} -DMM_MALLOC=1"
 fi
 
 ##########
@@ -112,10 +112,10 @@ then
     # If Homebrew is found and libomp was installed with it, this code adds the necessary
     # flags for the compiler to find libomp headers and for the linker to find libomp.dylib.
     HOMEBREW_LIBOMP_PREFIX=""
-    if command -v brew &> /dev/null; then
+    if command -v brew >/dev/null 2>&1; then
         ac_brew_openmp=no
         AC_MSG_CHECKING([whether OpenMP was installed via Homebrew])
-        brew --prefix libomp &>/dev/null && ac_brew_openmp=yes
+        brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes
         AC_MSG_RESULT([${ac_brew_openmp}])
         if test "${ac_brew_openmp}" = yes; then
             HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`

From b33a12ea3883f306388e69f12ceb421b1ee7ec29 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Sat, 14 Dec 2024 12:12:17 +0800
Subject: [PATCH 22/27] [fix] resolve potential attack in linker connection
 building (#6752)

---
 src/network/linkers_socket.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/network/linkers_socket.cpp b/src/network/linkers_socket.cpp
index 69e92a81b8eb..91d618bf1a2d 100644
--- a/src/network/linkers_socket.cpp
+++ b/src/network/linkers_socket.cpp
@@ -157,6 +157,9 @@ void Linkers::ListenThread(int incoming_cnt) {
     }
     int* ptr_in_rank = reinterpret_cast<int*>(buffer);
     int in_rank = *ptr_in_rank;
+    if (in_rank < 0 || in_rank >= num_machines_) {
+      Log::Fatal("Invalid rank %d found during initialization of linkers. The world size is %d.", in_rank, num_machines_);
+    }
     // add new socket
     SetLinker(in_rank, handler);
     ++connected_cnt;

From 1090a93b39e16f49621aa6824cd09d4390c3678a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sat, 14 Dec 2024 23:45:10 -0600
Subject: [PATCH 23/27] [python-package] do not copy column-major numpy arrays
 when predicting (#6751)

---
 python-package/lightgbm/basic.py         |  7 ++-----
 tests/python_package_test/test_engine.py | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 1db55385af1b..0f2e3697f6ec 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -1291,10 +1291,7 @@ def __inner_predict_np2d(
         predict_type: int,
         preds: Optional[np.ndarray],
     ) -> Tuple[np.ndarray, int]:
-        if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
-        else:  # change non-float data to float data, need to copy
-            data = np.array(mat.reshape(mat.size), dtype=np.float32)
+        data, layout = _np2d_to_np1d(mat)
         ptr_data, type_ptr_data, _ = _c_float_array(data)
         n_preds = self.__get_num_preds(
             start_iteration=start_iteration,
@@ -1314,7 +1311,7 @@ def __inner_predict_np2d(
                 ctypes.c_int(type_ptr_data),
                 ctypes.c_int32(mat.shape[0]),
                 ctypes.c_int32(mat.shape[1]),
-                ctypes.c_int(_C_API_IS_ROW_MAJOR),
+                ctypes.c_int(layout),
                 ctypes.c_int(predict_type),
                 ctypes.c_int(start_iteration),
                 ctypes.c_int(num_iteration),
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index cb2e893c9612..05afddb77c77 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -4611,3 +4611,18 @@ def test_bagging_by_query_in_lambdarank():
     ndcg_score_no_bagging_by_query = gbm_no_bagging_by_query.best_score["valid_0"]["ndcg@5"]
     assert ndcg_score_bagging_by_query >= ndcg_score - 0.1
     assert ndcg_score_no_bagging_by_query >= ndcg_score - 0.1
+
+
+def test_equal_predict_from_row_major_and_col_major_data():
+    X_row, y = make_synthetic_regression()
+    assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"]
+    ds = lgb.Dataset(X_row, y)
+    params = {"num_leaves": 8, "verbose": -1}
+    bst = lgb.train(params, ds, num_boost_round=5)
+    preds_row = bst.predict(X_row)
+
+    X_col = np.asfortranarray(X_row)
+    assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"]
+    preds_col = bst.predict(X_col)
+
+    np.testing.assert_allclose(preds_row, preds_col)

From c2f3807c73266b246a9aa74c670e4ab2940cde3e Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sun, 15 Dec 2024 16:09:36 +0300
Subject: [PATCH 24/27] [ci] use Ruff linter instead of isort (#6755)

* Update append-comment.sh

* Update static_analysis.yml

* Update static_analysis.yml

* Update basic.py

* Update basic.py

* Update .pre-commit-config.yaml

* Update basic.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update pyproject.toml

* Update pyproject.toml

* Update pyproject.toml

* Update pyproject.toml

* Update interactive_plot_example.ipynb

* Update pyproject.toml

* Update append-comment.sh

* Update basic.py

* Update basic.py

* Update pyproject.toml

* Update .pre-commit-config.yaml

* Update basic.py

* Update basic.py

* Update test_basic.R

* Update rank_objective.hpp

* Update histogram_16_64_256.cu

* Update static_analysis.yml

* ensure alphabetical order of rules
---
 .pre-commit-config.yaml                       | 10 ++------
 R-package/tests/testthat/test_basic.R         |  2 +-
 .../notebooks/interactive_plot_example.ipynb  |  2 +-
 python-package/lightgbm/basic.py              |  8 +++----
 python-package/pyproject.toml                 | 24 ++++++++-----------
 src/objective/rank_objective.hpp              |  2 +-
 .../kernels/histogram_16_64_256.cu            |  2 +-
 7 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b334db19b8e7..0edab8df1be6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,15 +17,9 @@ repos:
     hooks:
       - id: end-of-file-fixer
       - id: trailing-whitespace
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        name: isort (python)
-        args: ["--settings-path", "python-package/pyproject.toml"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.0
+    rev: v0.8.3
     hooks:
       # Run the linter.
       - id: ruff
@@ -40,7 +34,7 @@ repos:
     hooks:
       - id: shellcheck
   - repo: https://github.com/crate-ci/typos
-    rev: v1.23.2
+    rev: v1.28.3
     hooks:
       - id: typos
         args: ["--force-exclude"]
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 7310815c4a6d..06d35a146d66 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -2345,7 +2345,7 @@ test_that("early stopping works with lgb.cv()", {
   # never changes, its first iteration was the best oone
   expect_equal(bst$best_iter, 1L)
 
-  # best_score should be taken from the first metri
+  # best_score should be taken from the first metric
   expect_equal(bst$best_score, 0.2)
 
   # early stopping should have happened, since constant_metric was the first
diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb
index cc8efa2c187b..a8abdf325d9d 100644
--- a/examples/python-guide/notebooks/interactive_plot_example.ipynb
+++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb
@@ -30,7 +30,7 @@
     "try:\n",
     "    # To enable interactive mode you should install ipywidgets\n",
     "    # https://github.com/jupyter-widgets/ipywidgets\n",
-    "    from ipywidgets import interact, SelectMultiple\n",
+    "    from ipywidgets import SelectMultiple, interact\n",
     "\n",
     "    INTERACTIVE = True\n",
     "except ImportError:\n",
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 0f2e3697f6ec..e06290dc1c5f 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2504,13 +2504,13 @@ def _compare_params_for_warning(
         compare_result : bool
           Returns whether two dictionaries with params are equal.
         """
-        for k in other_params:
+        for k, v in other_params.items():
             if k not in ignore_keys:
-                if k not in params or params[k] != other_params[k]:
+                if k not in params or params[k] != v:
                     return False
-        for k in params:
+        for k, v in params.items():
             if k not in ignore_keys:
-                if k not in other_params or params[k] != other_params[k]:
+                if k not in other_params or v != other_params[k]:
                     return False
         return True
 
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 19866e01202b..8fcc85814db5 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -84,17 +84,6 @@ minimum-version = "build-system.requires"
 
 # end:build-system
 
-[tool.isort]
-include_trailing_comma = true
-line_length = 120
-# "vertical hanging indent", to match what ruff-format does
-# ref: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html#3-vertical-hanging-indent
-multi_line_output = 3
-skip_glob = [
-    "*/external_libs/*",
-    "*/lightgbm-python/*",
-]
-
 [tool.mypy]
 disallow_untyped_defs = true
 exclude = 'build/*|compile/*|docs/*|examples/*|external_libs/*|lightgbm-python/*|tests/*'
@@ -140,7 +129,7 @@ ignore = [
     "PLR1714",
     # (pylint) Magic value used in comparison
     "PLR2004",
-    # (pylint) for loop veriable overwritten by assignment target
+    # (pylint) for loop variable overwritten by assignment target
     "PLW2901",
     # (pylint) use 'elif' instead of 'else' then 'if', to reduce indentation
     "PLR5501"
@@ -152,10 +141,12 @@ select = [
     "C4",
     # pydocstyle
     "D",
-    # pycodestyle
+    # pycodestyle (errors)
     "E",
     # pyflakes
     "F",
+    # isort
+    "I",
     # NumPy-specific rules
     "NPY",
     # pylint
@@ -166,11 +157,13 @@ select = [
     "SIM401",
     # flake8-print
     "T",
+    # pycodestyle (warnings)
+    "W",
 ]
 
 [tool.ruff.lint.per-file-ignores]
 "docs/conf.py" = [
-    # (flake8-bugbear) raise exceptions with "raise ... from errr"
+    # (flake8-bugbear) raise exceptions with "raise ... from err"
     "B904",
     # (flake8-print) flake8-print
     "T"
@@ -196,3 +189,6 @@ select = [
 
 [tool.ruff.lint.pydocstyle]
 convention = "numpy"
+
+[tool.ruff.lint.isort]
+known-first-party = ["lightgbm"]
diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp
index ba8496ec4864..8227c7b65658 100644
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -204,7 +204,7 @@ class LambdarankNDCG : public RankingObjective {
     }
     const double worst_score = score[sorted_idx[worst_idx]];
     double sum_lambdas = 0.0;
-    // start accmulate lambdas by pairs that contain at least one document above truncation level
+    // start accumulate lambdas by pairs that contain at least one document above truncation level
     for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
       if (score[sorted_idx[i]] == kMinScore) { continue; }
       for (data_size_t j = i + 1; j < cnt; ++j) {
diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu
index 59662fb19d55..9d8427a6f9a8 100644
--- a/src/treelearner/kernels/histogram_16_64_256.cu
+++ b/src/treelearner/kernels/histogram_16_64_256.cu
@@ -150,7 +150,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
     // size of threads that process this feature4
     const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups);
 
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
     const unsigned int subglobal_tid  = gtid - feature_id * subglobal_size;
 
 

From 31205fc8f816c677988f56f7699e78120a8f193c Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sun, 15 Dec 2024 21:24:21 +0300
Subject: [PATCH 25/27] [ci] remove Docker volumes during Azure cleanup (#6760)

---
 .vsts-ci.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 3a111e10898e..40424840c82d 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -69,15 +69,17 @@ jobs:
       # check disk usage
       print-diagnostics
       # remove old containers, container images, volumes
-      # ref: https://stackoverflow.com/a/32723127/3986677)
+      # ref: https://stackoverflow.com/a/32723127/3986677
+      # ref: https://depot.dev/blog/docker-clear-cache#removing-everything-with-docker-system-prune
       echo "---- running 'docker system prune' ----"
       /tmp/docker system prune \
         --all \
         --force \
+        --volumes \
         --filter until=720h
       # check disk usage again
       print-diagnostics
-    displayName: clean
+    displayName: Clean
 ###########################################
 - job: Linux
 ###########################################

From 8eb3c3c625f6e5035a1da718d5fbd6c0bd0dcc9a Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 15 Dec 2024 19:19:40 -0600
Subject: [PATCH 26/27] [ci] fix linkchecker job (#6757)

---
 .ci/test.sh         | 4 ++--
 docs/.linkcheckerrc | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index cc8831f94c09..82c159064a33 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -146,8 +146,8 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
     make -C docs html || exit 1
     if [[ $TASK == "check-links" ]]; then
         # check docs for broken links
-        pip install linkchecker
-        linkchecker --config=.linkcheckerrc ./docs/_build/html/*.html || exit 1
+        pip install 'linkchecker>=10.5.0'
+        linkchecker --config=./docs/.linkcheckerrc ./docs/_build/html/*.html || exit 1
         exit 0
     fi
     # check the consistency of parameters' descriptions and other stuff
diff --git a/docs/.linkcheckerrc b/docs/.linkcheckerrc
index 003d8699a875..a4707aa536ea 100644
--- a/docs/.linkcheckerrc
+++ b/docs/.linkcheckerrc
@@ -1,9 +1,9 @@
 [checking]
-maxrequestspersecond=1
+maxrequestspersecond=0.1
 recursionlevel=1
 anchors=1
 sslverify=0
-threads=1
+threads=4
 
 [filtering]
 ignore=

From 480600b3afaf2a0a6f32cf417edf9567f625b2c3 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 16 Dec 2024 10:45:06 -0600
Subject: [PATCH 27/27] [python-package] simplify eval result printing (#6749)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: José Morales <jmoralz92@gmail.com>
---
 python-package/lightgbm/callback.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
index ae1e72c549d4..c64fb8ba755b 100644
--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -73,15 +73,13 @@ class CallbackEnv:
 
 def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str:
     """Format metric string."""
-    if len(value) == 4:
-        return f"{value[0]}'s {value[1]}: {value[2]:g}"
-    elif len(value) == 5:
-        if show_stdv:
-            return f"{value[0]}'s {value[1]}: {value[2]:g} + {value[4]:g}"  # type: ignore[misc]
-        else:
-            return f"{value[0]}'s {value[1]}: {value[2]:g}"
-    else:
-        raise ValueError("Wrong metric value")
+    dataset_name, metric_name, metric_value, *_ = value
+    out = f"{dataset_name}'s {metric_name}: {metric_value:g}"
+    # tuples from cv() sometimes have a 5th item, with standard deviation of
+    # the evaluation metric (taken over all cross-validation folds)
+    if show_stdv and len(value) == 5:
+        out += f" + {value[4]:g}"
+    return out
 
 
 class _LogEvaluationCallback: