From 784f38415d4dd08ccefe2a536d08971020672cca Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Sun, 1 Dec 2024 05:51:56 +0100
Subject: [PATCH 1/4] [ci] Introduce `typos` pre-commit hook (#6564)

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .ci/test-r-package-windows.ps1                |  2 +-
 .github/workflows/lock.yml                    |  2 +-
 .pre-commit-config.yaml                       |  8 ++++-
 .typos.toml                                   | 21 +++++++++++
 CMakeLists.txt                                |  2 +-
 R-package/R/lgb.Booster.R                     |  4 +--
 R-package/R/lgb.importance.R                  |  2 +-
 R-package/R/lgb.model.dt.tree.R               |  2 +-
 R-package/R/lightgbm.R                        |  2 +-
 R-package/demo/cross_validation.R             |  2 +-
 R-package/demo/early_stopping.R               |  2 +-
 R-package/man/lgb.configure_fast_predict.Rd   |  4 +--
 R-package/man/lgb.importance.Rd               |  2 +-
 R-package/man/lgb.model.dt.tree.Rd            |  2 +-
 R-package/man/lightgbm.Rd                     |  2 +-
 R-package/tests/testthat/test_basic.R         |  4 +--
 .../tests/testthat/test_custom_objective.R    |  2 +-
 .../tests/testthat/test_lgb.interprete.R      |  2 +-
 .../testthat/test_lgb.plot.interpretation.R   |  4 +--
 cmake/Sanitizer.cmake                         |  2 +-
 docker/README.md                              |  4 +--
 docs/Parameters.rst                           |  2 +-
 docs/_static/js/script.js                     |  4 +--
 examples/lambdarank/train.conf                |  2 +-
 examples/regression/train.conf                | 10 +++---
 include/LightGBM/cuda/cuda_algorithms.hpp     |  6 ++--
 include/LightGBM/dataset.h                    |  2 +-
 include/LightGBM/utils/common.h               |  4 +--
 include/LightGBM/utils/random.h               |  4 +--
 python-package/lightgbm/basic.py              |  2 +-
 python-package/lightgbm/dask.py               |  2 +-
 src/boosting/bagging.hpp                      | 10 +++---
 src/boosting/gbdt_model_text.cpp              |  6 ++--
 src/io/metadata.cpp                           |  6 ++--
 src/network/linker_topo.cpp                   |  4 +--
 src/objective/rank_objective.hpp              |  2 +-
 .../cuda/cuda_best_split_finder.cpp           |  2 +-
 src/treelearner/cuda/cuda_data_partition.cu   | 18 +++++-----
 src/treelearner/cuda/cuda_data_partition.hpp  | 18 +++++-----
 .../cuda/cuda_histogram_constructor.cpp       |  2 +-
 .../cuda/cuda_histogram_constructor.hpp       |  2 +-
 src/treelearner/cuda/cuda_leaf_splits.cpp     |  8 ++---
 src/treelearner/cuda/cuda_leaf_splits.cu      | 16 ++++-----
 src/treelearner/cuda/cuda_leaf_splits.hpp     |  6 ++--
 .../data_parallel_tree_learner.cpp            |  6 ++--
 src/treelearner/feature_histogram.hpp         |  6 ++--
 src/treelearner/gpu_tree_learner.cpp          |  2 +-
 .../kernels/histogram_16_64_256.cu            |  4 +--
 src/treelearner/ocl/histogram16.cl            |  4 +--
 src/treelearner/ocl/histogram256.cl           |  2 +-
 src/treelearner/ocl/histogram64.cl            |  2 +-
 src/treelearner/parallel_tree_learner.h       |  8 ++---
 src/treelearner/serial_tree_learner.cpp       | 14 ++++----
 .../voting_parallel_tree_learner.cpp          | 12 +++----
 tests/cpp_tests/test_chunked_array.cpp        |  8 ++---
 tests/cpp_tests/test_stream.cpp               | 36 +++++++++----------
 tests/python_package_test/test_dask.py        |  2 +-
 tests/python_package_test/test_engine.py      |  2 +-
 58 files changed, 175 insertions(+), 148 deletions(-)
 create mode 100644 .typos.toml

diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index 1ce698a49c72..a3f524b60be7 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -171,7 +171,7 @@ Write-Output "Done installing Rtools"
 Write-Output "Installing CMake"
 Add-Type -AssemblyName System.IO.Compression.FileSystem
 [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Assert-Output $?
-# Remove old CMake shiped with RTools
+# Remove old CMake shipped with RTools
 Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore
 Write-Output "Done installing CMake"
 
diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 4efe658b7f45..195fd5f1c8f1 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -39,7 +39,7 @@ jobs:
             This pull request has been automatically locked since there has not been any recent activity since it was closed.
             To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues
             including a reference to this.
-          # what shoulld the locking status be?
+          # what should the locking status be?
           issue-lock-reason: 'resolved'
           pr-lock-reason: 'resolved'
           process-only: 'issues, prs'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7e5e5dd8e9d9..b334db19b8e7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,4 +38,10 @@ repos:
   - repo: https://github.com/shellcheck-py/shellcheck-py
     rev: v0.10.0.1
     hooks:
-    - id: shellcheck
+      - id: shellcheck
+  - repo: https://github.com/crate-ci/typos
+    rev: v1.23.2
+    hooks:
+      - id: typos
+        args: ["--force-exclude"]
+        exclude: (\.gitignore$)|(^\.editorconfig$)
diff --git a/.typos.toml b/.typos.toml
new file mode 100644
index 000000000000..6dc2c2c97529
--- /dev/null
+++ b/.typos.toml
@@ -0,0 +1,21 @@
+default.extend-ignore-re = [
+  "/Ot",
+  "mis-alignment",
+  "mis-spelled",
+  "posix-seh-rt",
+]
+
+[default.extend-words]
+MAPE = "MAPE"
+datas = "datas"
+interprete = "interprete"
+mape = "mape"
+splitted = "splitted"
+
+[default.extend-identifiers]
+ERRORs = "ERRORs"
+GAM = "GAM"
+ND24s = "ND24s"
+WARNINGs = "WARNINGs"
+fullset = "fullset"
+thess = "thess"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 183ef62bd68e..4f57cf9622e6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ option(USE_SWIG "Enable SWIG to generate Java API" OFF)
 option(USE_TIMETAG "Set to ON to output time costs" OFF)
 option(USE_CUDA "Enable CUDA-accelerated training " OFF)
 option(USE_DEBUG "Set to ON for Debug mode" OFF)
-option(USE_SANITIZER "Use santizer flags" OFF)
+option(USE_SANITIZER "Use sanitizer flags" OFF)
 set(
   ENABLED_SANITIZERS
   "address" "leak" "undefined"
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index a13516ff6569..85a91b1ce058 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -1114,7 +1114,7 @@ predict.lgb.Booster <- function(object,
 #'
 #'          Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
 #'          will cause it to ignore the fast-predict configuration and take the slow route instead
-#'          (but be aware that an existing configuration might not always be overriden by supplying
+#'          (but be aware that an existing configuration might not always be overridden by supplying
 #'          different parameters or prediction type, so make sure to check that the output is what
 #'          was expected when a prediction is to be made on a single row for something different than
 #'          what is configured).
@@ -1128,7 +1128,7 @@ predict.lgb.Booster <- function(object,
 #'          and as such, this function will produce an error if passing \code{csr=TRUE} and
 #'          \code{type = "contrib"} together.
 #' @inheritParams lgb_predict_shared_params
-#' @param model LighGBM model object (class \code{lgb.Booster}).
+#' @param model LightGBM model object (class \code{lgb.Booster}).
 #'
 #'              \bold{The object will be modified in-place}.
 #' @param csr Whether the prediction function is going to be called on sparse CSR inputs.
diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R
index 7c76131f4f53..d60507cf00d4 100644
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -9,7 +9,7 @@
 #'   \item{\code{Feature}: Feature names in the model.}
 #'   \item{\code{Gain}: The total gain of this feature's splits.}
 #'   \item{\code{Cover}: The number of observation related to this feature.}
-#'   \item{\code{Frequency}: The number of times a feature splited in trees.}
+#'   \item{\code{Frequency}: The number of times a feature split in trees.}
 #' }
 #'
 #' @examples
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index db4ef955f866..ac1b2f9aaf14 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -10,7 +10,7 @@
 #'        \emph{New in version 4.4.0}
 #'
 #' @return
-#' A \code{data.table} with detailed information about model trees' nodes and leafs.
+#' A \code{data.table} with detailed information about model trees' nodes and leaves.
 #'
 #' The columns of the \code{data.table} are:
 #'
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index efa593ffe12f..6cb4eebd8baf 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -139,7 +139,7 @@ NULL
 #'                    system, but be aware that getting the number of cores detected correctly requires package
 #'                    \code{RhpcBLASctl} to be installed.
 #'
-#'                    This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
+#'                    This parameter gets overridden by \code{num_threads} and its aliases under \code{params}
 #'                    if passed there.
 #'
 #'                    \emph{New in version 4.0.0}
diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R
index 0324f83f2da9..9f74ef7f4b2a 100644
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@@ -51,7 +51,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # For example, we are doing logistic loss, the prediction is score before logistic transformation
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
index 6ca214c5ac7b..4435dd1b09b6 100644
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -29,7 +29,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # For example, we are doing logistic loss, the prediction is score before logistic transformation
 # The built-in evaluation error assumes input is after logistic transformation
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd
index e02600451df5..9cd4339bdced 100644
--- a/R-package/man/lgb.configure_fast_predict.Rd
+++ b/R-package/man/lgb.configure_fast_predict.Rd
@@ -14,7 +14,7 @@ lgb.configure_fast_predict(
 )
 }
 \arguments{
-\item{model}{LighGBM model object (class \code{lgb.Booster}).
+\item{model}{LightGBM model object (class \code{lgb.Booster}).
 
              \bold{The object will be modified in-place}.}
 
@@ -98,7 +98,7 @@ Calling this function multiple times with different parameters might not overrid
 
          Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
          will cause it to ignore the fast-predict configuration and take the slow route instead
-         (but be aware that an existing configuration might not always be overriden by supplying
+         (but be aware that an existing configuration might not always be overridden by supplying
          different parameters or prediction type, so make sure to check that the output is what
          was expected when a prediction is to be made on a single row for something different than
          what is configured).
diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd
index 79cb82f5d8ef..5099643112be 100644
--- a/R-package/man/lgb.importance.Rd
+++ b/R-package/man/lgb.importance.Rd
@@ -17,7 +17,7 @@ For a tree model, a \code{data.table} with the following columns:
   \item{\code{Feature}: Feature names in the model.}
   \item{\code{Gain}: The total gain of this feature's splits.}
   \item{\code{Cover}: The number of observation related to this feature.}
-  \item{\code{Frequency}: The number of times a feature splited in trees.}
+  \item{\code{Frequency}: The number of times a feature split in trees.}
 }
 }
 \description{
diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd
index ecfee17332f5..df36b6a94f42 100644
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@@ -18,7 +18,7 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
        \emph{New in version 4.4.0}}
 }
 \value{
-A \code{data.table} with detailed information about model trees' nodes and leafs.
+A \code{data.table} with detailed information about model trees' nodes and leaves.
 
 The columns of the \code{data.table} are:
 
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 90cb3166bf5c..376a6d03a6b1 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -93,7 +93,7 @@ set to the iteration number of the best iteration.}
                    system, but be aware that getting the number of cores detected correctly requires package
                    \code{RhpcBLASctl} to be installed.
 
-                   This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
+                   This parameter gets overridden by \code{num_threads} and its aliases under \code{params}
                    if passed there.
 
                    \emph{New in version 4.0.0}}
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index c734816b4038..7310815c4a6d 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -9,7 +9,7 @@ set.seed(708L)
 #               to an accumulator then returns the current value.
 #               This is used to mock the situation where an evaluation
 #               metric increases every iteration
-ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR"
+ACCUMULATOR_NAME <- "INCREASING_METRIC_ACCUMULATOR"
 assign(x = ACCUMULATOR_NAME, value = 0.0, envir = .GlobalEnv)
 
 .increasing_metric <- function(preds, dtrain) {
@@ -1777,7 +1777,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th
     , early_stopping_rounds + 1L
   )
 
-  # Booster should understand thatt all three of these metrics should be minimized
+  # Booster should understand that all three of these metrics should be minimized
   eval_info <- bst$.__enclos_env__$private$get_eval_info()
   expect_identical(eval_info, c("mape", "rmse", "l1"))
   expect_identical(
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index 2c10b9d571dc..a1baf0067c4a 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -14,7 +14,7 @@ logregobj <- function(preds, dtrain) {
 
 # User-defined evaluation function returns a pair (metric_name, result, higher_better)
 # NOTE: when you do customized loss function, the default prediction value is margin
-# This may make built-in evalution metric calculate wrong results
+# This may make built-in evaluation metric calculate wrong results
 # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
   labels <- get_field(dtrain, "label")
diff --git a/R-package/tests/testthat/test_lgb.interprete.R b/R-package/tests/testthat/test_lgb.interprete.R
index 322a80a55bc5..cfcd1c942f31 100644
--- a/R-package/tests/testthat/test_lgb.interprete.R
+++ b/R-package/tests/testthat/test_lgb.interprete.R
@@ -5,7 +5,7 @@
     log(x / (1.0 - x))
 }
 
-test_that("lgb.intereprete works as expected for binary classification", {
+test_that("lgb.interprete works as expected for binary classification", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/tests/testthat/test_lgb.plot.interpretation.R b/R-package/tests/testthat/test_lgb.plot.interpretation.R
index 6cba9927942a..e8a021fc7237 100644
--- a/R-package/tests/testthat/test_lgb.plot.interpretation.R
+++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R
@@ -5,7 +5,7 @@
     log(x / (1.0 - x))
 }
 
-test_that("lgb.plot.interepretation works as expected for binary classification", {
+test_that("lgb.plot.interpretation works as expected for binary classification", {
     data(agaricus.train, package = "lightgbm")
     train <- agaricus.train
     dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -57,7 +57,7 @@ test_that("lgb.plot.interepretation works as expected for binary classification"
     expect_null(plot_res)
 })
 
-test_that("lgb.plot.interepretation works as expected for multiclass classification", {
+test_that("lgb.plot.interpretation works as expected for multiclass classification", {
     data(iris)
 
     # We must convert factors to numeric
diff --git a/cmake/Sanitizer.cmake b/cmake/Sanitizer.cmake
index a3768effac0d..f99048476d8b 100644
--- a/cmake/Sanitizer.cmake
+++ b/cmake/Sanitizer.cmake
@@ -18,7 +18,7 @@ macro(enable_sanitizer sanitizer)
     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined")
 
   else()
-    message(FATAL_ERROR "Santizer ${sanitizer} not supported.")
+    message(FATAL_ERROR "Sanitizer ${sanitizer} not supported.")
   endif()
 endmacro()
 
diff --git a/docker/README.md b/docker/README.md
index dfedc2f4e3f1..e68346545ccf 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -55,7 +55,7 @@ After this runs, a LightGBM model can be found at `LightGBM-CLI-model.txt`.
 
 For more details on how to configure and use the LightGBM CLI, see https://lightgbm.readthedocs.io/en/latest/Quick-Start.html.
 
-## Running the Python-package Сontainer
+## Running the Python-package Container
 
 Build an image with the LightGBM Python-package installed.
 
@@ -114,7 +114,7 @@ docker run \
     python
 ```
 
-## Running the R-package Сontainer
+## Running the R-package Container
 
 Build an image with the LightGBM R-package installed.
 
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 1f80a13d5731..b44d90ecec10 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -35,7 +35,7 @@ For example, in Python:
 
 .. code-block:: python
 
-   # use learning rate of 0.07, becase 'learning_rate'
+   # use learning rate of 0.07, because 'learning_rate'
    # is the primary parameter name
    lgb.train(
       params={
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index 3f129501e06f..c4717b8a0ee5 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -17,7 +17,7 @@ $(() => {
         $(
             '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
         ).appendTo("body");
-        const collapsable = [
+        const collapsible = [
             "#build-threadless-version-not-recommended",
             "#build-mpi-version",
             "#build-gpu-version",
@@ -25,7 +25,7 @@ $(() => {
             "#build-java-wrapper",
             "#build-c-unit-tests",
         ];
-        $.each(collapsable, (_, val) => {
+        $.each(collapsible, (_, val) => {
             const header = `${val} > :header:first`;
             const content = `${val} :not(:header:first)`;
             $(header).addClass("closed");
diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf
index 2aa2113b40d4..f007dcd6fe66 100644
--- a/examples/lambdarank/train.conf
+++ b/examples/lambdarank/train.conf
@@ -64,7 +64,7 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial
 
-# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
+# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
 # num_threads = 8
 
 # feature sub-sample, will random select 80% feature to train on each iteration
diff --git a/examples/regression/train.conf b/examples/regression/train.conf
index cd910af61dcf..992bc6c9ab53 100644
--- a/examples/regression/train.conf
+++ b/examples/regression/train.conf
@@ -20,7 +20,7 @@ objective = regression
 # binary_error
 metric = l2
 
-# frequence for metric output
+# frequency for metric output
 metric_freq = 1
 
 # true if need output metric for training data, alias: tranining_metric, train_metric
@@ -36,12 +36,12 @@ max_bin = 255
 # forcedbins_filename = forced_bins.json
 
 # training data
-# if exsting weight file, should name to "regression.train.weight"
+# if existing weight file, should name to "regression.train.weight"
 # alias: train_data, train
 data = regression.train
 
 # validation data, support multi validation data, separated by ','
-# if exsting weight file, should name to "regression.test.weight"
+# if existing weight file, should name to "regression.test.weight"
 # alias: valid, test, test_data,
 valid_data = regression.test
 
@@ -62,7 +62,7 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial
 
-# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
+# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
 # num_threads = 8
 
 # feature sub-sample, will random select 80% feature to train on each iteration
@@ -72,7 +72,7 @@ feature_fraction = 0.9
 # Support bagging (data sub-sample), will perform bagging every 5 iterations
 bagging_freq = 5
 
-# Bagging farction, will random select 80% data on bagging
+# Bagging fraction, will random select 80% data on bagging
 # alias: sub_row
 bagging_fraction = 0.8
 
diff --git a/include/LightGBM/cuda/cuda_algorithms.hpp b/include/LightGBM/cuda/cuda_algorithms.hpp
index f79fc57e4f42..abda07b1582f 100644
--- a/include/LightGBM/cuda/cuda_algorithms.hpp
+++ b/include/LightGBM/cuda/cuda_algorithms.hpp
@@ -115,7 +115,7 @@ __device__ __forceinline__ T ShuffleReduceSumWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceSum(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
@@ -145,7 +145,7 @@ __device__ __forceinline__ T ShuffleReduceMaxWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceMax(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
@@ -196,7 +196,7 @@ __device__ __forceinline__ T ShuffleReduceMinWarp(T value, const data_size_t len
   return value;
 }
 
-// reduce values from an 1-dimensional block (block size must be no greather than 1024)
+// reduce values from an 1-dimensional block (block size must be no greater than 1024)
 template <typename T>
 __device__ __forceinline__ T ShuffleReduceMin(T value, T* shared_mem_buffer, const size_t len) {
   const uint32_t warpLane = threadIdx.x % warpSize;
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index 220a1f9f009c..ef214b7cd89d 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -376,7 +376,7 @@ class Metadata {
   std::vector<data_size_t> query_boundaries_;
   /*! \brief Query weights */
   std::vector<label_t> query_weights_;
-  /*! \brief Number of querys */
+  /*! \brief Number of queries */
   data_size_t num_queries_;
   /*! \brief Number of Initial score, used to check correct weight file */
   int64_t num_init_score_;
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
index 6c3ebf5d0096..67bc07b0ecd5 100644
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -925,11 +925,11 @@ class AlignmentAllocator {
 
   inline ~AlignmentAllocator() throw() {}
 
-  inline pointer adress(reference r) {
+  inline pointer address(reference r) {
     return &r;
   }
 
-  inline const_pointer adress(const_reference r) const {
+  inline const_pointer address(const_reference r) const {
     return &r;
   }
 
diff --git a/include/LightGBM/utils/random.h b/include/LightGBM/utils/random.h
index 6f89f935b310..eb115ea96644 100644
--- a/include/LightGBM/utils/random.h
+++ b/include/LightGBM/utils/random.h
@@ -22,9 +22,9 @@ class Random {
   */
   Random() {
     std::random_device rd;
-    auto genrator = std::mt19937(rd());
+    auto generator = std::mt19937(rd());
     std::uniform_int_distribution<int> distribution(0, x);
-    x = distribution(genrator);
+    x = distribution(generator);
   }
   /*!
   * \brief Constructor, with specific seed
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index cf3723aadc63..99a690f38993 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -3525,7 +3525,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset":
             _log_warning(err_msg)
         self.feature_name = self.get_feature_name()
         _log_warning(
-            "Reseting categorical features.\n"
+            "Resetting categorical features.\n"
             "You can set new categorical features via ``set_categorical_feature`` method"
         )
         self.categorical_feature = "auto"
diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index e15979bc40db..dcdacba7366c 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -967,7 +967,7 @@ def _extract(items: List[Any], i: int) -> Any:
                     out[i].append(part)
 
             # by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix
-            # the code below is used instead to ensure that the sparse type is preserved during concatentation
+            # the code below is used instead to ensure that the sparse type is preserved during concatenation
             if isinstance(pred_meta, ss.csr_matrix):
                 concat_fn = partial(ss.vstack, format="csr")
             elif isinstance(pred_meta, ss.csc_matrix):
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 7a66b5696425..451384e6850a 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -73,17 +73,17 @@ class BaggingSampleStrategy : public SampleStrategy {
           for (data_size_t i = start_index + 1; i < end_index; ++i) {
             sampled_query_boundaries_[i] += sampled_query_boundaries_[i - 1];
           }
-          sampled_query_boundaires_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
+          sampled_query_boundaries_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
          });
 
         for (int thread_index = 1; thread_index < num_blocks; ++thread_index) {
-          sampled_query_boundaires_thread_buffer_[thread_index] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
+          sampled_query_boundaries_thread_buffer_[thread_index] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
         }
 
         Threading::For<data_size_t>(0, num_sampled_queries_ + 1, 128, [this](int thread_index, data_size_t start_index, data_size_t end_index) {
           if (thread_index > 0) {
             for (data_size_t i = start_index; i < end_index; ++i) {
-              sampled_query_boundaries_[i] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
+              sampled_query_boundaries_[i] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
             }
           }
         });
@@ -171,7 +171,7 @@ class BaggingSampleStrategy : public SampleStrategy {
       } else {
         bagging_runner_.ReSize(num_queries_);
         sampled_query_boundaries_.resize(num_queries_ + 1, 0);
-        sampled_query_boundaires_thread_buffer_.resize(num_threads_, 0);
+        sampled_query_boundaries_thread_buffer_.resize(num_threads_, 0);
         bag_query_indices_.resize(num_data_);
       }
       bagging_rands_.clear();
@@ -280,7 +280,7 @@ class BaggingSampleStrategy : public SampleStrategy {
   /*! \brief query boundaries of the in-bag queries */
   std::vector<data_size_t> sampled_query_boundaries_;
   /*! \brief buffer for calculating sampled_query_boundaries_ */
-  std::vector<data_size_t> sampled_query_boundaires_thread_buffer_;
+  std::vector<data_size_t> sampled_query_boundaries_thread_buffer_;
   /*! \brief in-bag query indices */
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_query_indices_;
   /*! \brief number of queries in the training dataset */
diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp
index 27be5afe066e..e8b6dd2332ef 100644
--- a/src/boosting/gbdt_model_text.cpp
+++ b/src/boosting/gbdt_model_text.cpp
@@ -545,17 +545,17 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
     }
   } else {
     std::vector<size_t> tree_sizes = CommonC::StringToArray<size_t>(key_vals["tree_sizes"].c_str(), ' ');
-    std::vector<size_t> tree_boundries(tree_sizes.size() + 1, 0);
+    std::vector<size_t> tree_boundaries(tree_sizes.size() + 1, 0);
     int num_trees = static_cast<int>(tree_sizes.size());
     for (int i = 0; i < num_trees; ++i) {
-      tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i];
+      tree_boundaries[i + 1] = tree_boundaries[i] + tree_sizes[i];
       models_.emplace_back(nullptr);
     }
     OMP_INIT_EX();
     #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
     for (int i = 0; i < num_trees; ++i) {
       OMP_LOOP_EX_BEGIN();
-      auto cur_p = p + tree_boundries[i];
+      auto cur_p = p + tree_boundaries[i];
       auto line_len = Common::GetLine(cur_p);
       std::string cur_line(cur_p, line_len);
       if (Common::StartsWith(cur_line, "Tree=")) {
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index f46e6d1c9f14..f6f07c434661 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -225,7 +225,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
       num_positions_ = 0;
     }
 
-    // check query boundries
+    // check query boundaries
     if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_data_) {
       query_boundaries_.clear();
       num_queries_ = 0;
@@ -282,7 +282,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
       }
     }
     if (query_load_from_file_) {
-      // check query boundries
+      // check query boundaries
       if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_all_data) {
         query_boundaries_.clear();
         num_queries_ = 0;
@@ -584,7 +584,7 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
   if (positions_.empty()) {
     positions_.resize(num_data_);
   } else {
-    Log::Warning("Overwritting positions in dataset.");
+    Log::Warning("Overwriting positions in dataset.");
   }
   num_positions_ = num_data_;
 
diff --git a/src/network/linker_topo.cpp b/src/network/linker_topo.cpp
index fccfb1e63829..af46ef4f494e 100644
--- a/src/network/linker_topo.cpp
+++ b/src/network/linker_topo.cpp
@@ -35,10 +35,10 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
   }
   BruckMap bruckMap(k);
   for (int j = 0; j < k; ++j) {
-    // set incoming rank at k-th commuication
+    // set incoming rank at k-th communication
     const int in_rank = (rank + distance[j]) % num_machines;
     bruckMap.in_ranks[j] = in_rank;
-    // set outgoing rank at k-th commuication
+    // set outgoing rank at k-th communication
     const int out_rank = (rank - distance[j] + num_machines) % num_machines;
     bruckMap.out_ranks[j] = out_rank;
   }
diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp
index a0710804baae..ba8496ec4864 100644
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -46,7 +46,7 @@ class RankingObjective : public ObjectiveFunction {
     position_ids_ = metadata.position_ids();
     // get number of different position ids
     num_position_ids_ = static_cast<data_size_t>(metadata.num_position_ids());
-    // get boundries
+    // get boundaries
     query_boundaries_ = metadata.query_boundaries();
     if (query_boundaries_ == nullptr) {
       Log::Fatal("Ranking tasks require query information");
diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp
index 95758542849c..e272ce744b1a 100644
--- a/src/treelearner/cuda/cuda_best_split_finder.cpp
+++ b/src/treelearner/cuda/cuda_best_split_finder.cpp
@@ -120,7 +120,7 @@ void CUDABestSplitFinder::Init() {
 void CUDABestSplitFinder::InitCUDAFeatureMetaInfo() {
   AllocateCUDAMemory<int8_t>(&cuda_is_feature_used_bytree_, static_cast<size_t>(num_features_), __FILE__, __LINE__);
 
-  // intialize split find task information (a split find task is one pass through the histogram of a feature)
+  // initialize split find task information (a split find task is one pass through the histogram of a feature)
   num_tasks_ = 0;
   for (int inner_feature_index = 0; inner_feature_index < num_features_; ++inner_feature_index) {
     const uint32_t num_bin = feature_num_bins_[inner_feature_index];
diff --git a/src/treelearner/cuda/cuda_data_partition.cu b/src/treelearner/cuda/cuda_data_partition.cu
index 3090b7a84176..4ca9d9279443 100644
--- a/src/treelearner/cuda/cuda_data_partition.cu
+++ b/src/treelearner/cuda/cuda_data_partition.cu
@@ -262,7 +262,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
   }
 }
 
-#define GenDataToLeftBitVectorKernel_PARMS \
+#define GenDataToLeftBitVectorKernel_PARAMS \
   const BIN_TYPE* column_data, \
   const data_size_t num_data_in_leaf, \
   const data_size_t* data_indices_in_leaf, \
@@ -286,7 +286,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, bool USE_MIN_BIN, typename BIN_TYPE>
 __global__ void GenDataToLeftBitVectorKernel(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   uint16_t* block_to_left_offset,
   data_size_t* block_to_left_offset_buffer,
   data_size_t* block_to_right_offset_buffer) {
@@ -335,7 +335,7 @@ __global__ void GenDataToLeftBitVectorKernel(
 
 template <typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool missing_is_zero,
   const bool missing_is_na,
   const bool mfb_is_zero,
@@ -363,7 +363,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool missing_is_na,
   const bool mfb_is_zero,
   const bool mfb_is_na,
@@ -380,7 +380,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool mfb_is_zero,
   const bool mfb_is_na,
   const bool max_bin_to_left,
@@ -396,7 +396,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool mfb_is_na,
   const bool max_bin_to_left,
   const bool is_single_feature_in_column) {
@@ -413,7 +413,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool max_bin_to_left,
   const bool is_single_feature_in_column) {
   if (!max_bin_to_left) {
@@ -429,7 +429,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
 
 template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
 void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner4(
-  GenDataToLeftBitVectorKernel_PARMS,
+  GenDataToLeftBitVectorKernel_PARAMS,
   const bool is_single_feature_in_column) {
   if (!is_single_feature_in_column) {
     GenDataToLeftBitVectorKernel
@@ -548,7 +548,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernel(
 
 #undef UpdateDataIndexToLeafIndexKernel_PARAMS
 #undef UpdateDataIndexToLeafIndex_ARGS
-#undef GenDataToLeftBitVectorKernel_PARMS
+#undef GenDataToLeftBitVectorKernel_PARAMS
 #undef GenBitVector_ARGS
 
 template <typename BIN_TYPE, bool USE_MIN_BIN>
diff --git a/src/treelearner/cuda/cuda_data_partition.hpp b/src/treelearner/cuda/cuda_data_partition.hpp
index f6bbab9b8c65..bfcce89af243 100644
--- a/src/treelearner/cuda/cuda_data_partition.hpp
+++ b/src/treelearner/cuda/cuda_data_partition.hpp
@@ -174,7 +174,7 @@ class CUDADataPartition {
     const int left_leaf_index,
     const int right_leaf_index);
 
-#define GenDataToLeftBitVectorKernel_PARMS \
+#define GenDataToLeftBitVectorKernel_PARAMS \
   const BIN_TYPE* column_data, \
   const data_size_t num_data_in_leaf, \
   const data_size_t* data_indices_in_leaf, \
@@ -187,7 +187,7 @@ class CUDADataPartition {
 
   template <typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool missing_is_zero,
     const bool missing_is_na,
     const bool mfb_is_zero,
@@ -197,7 +197,7 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner0(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool missing_is_na,
     const bool mfb_is_zero,
     const bool mfb_is_na,
@@ -206,7 +206,7 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner1(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool mfb_is_zero,
     const bool mfb_is_na,
     const bool max_bin_to_left,
@@ -214,23 +214,23 @@ class CUDADataPartition {
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner2(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool mfb_is_na,
     const bool max_bin_to_left,
     const bool is_single_feature_in_column);
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner3(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool max_bin_to_left,
     const bool is_single_feature_in_column);
 
   template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
   void LaunchGenDataToLeftBitVectorKernelInner4(
-    GenDataToLeftBitVectorKernel_PARMS,
+    GenDataToLeftBitVectorKernel_PARAMS,
     const bool is_single_feature_in_column);
 
-#undef GenDataToLeftBitVectorKernel_PARMS
+#undef GenDataToLeftBitVectorKernel_PARAMS
 
 #define UpdateDataIndexToLeafIndexKernel_PARAMS \
   const BIN_TYPE* column_data, \
@@ -379,7 +379,7 @@ class CUDADataPartition {
   int* cuda_split_info_buffer_;
 
   // dataset information
-  /*! \brief number of data in training set, for intialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
+  /*! \brief number of data in training set, for initialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
   data_size_t* cuda_num_data_;
 
 
diff --git a/src/treelearner/cuda/cuda_histogram_constructor.cpp b/src/treelearner/cuda/cuda_histogram_constructor.cpp
index 659db2aad24c..9f42eadec6f7 100644
--- a/src/treelearner/cuda/cuda_histogram_constructor.cpp
+++ b/src/treelearner/cuda/cuda_histogram_constructor.cpp
@@ -150,7 +150,7 @@ void CUDAHistogramConstructor::CalcConstructHistogramKernelDim(
   int* block_dim_y,
   const data_size_t num_data_in_smaller_leaf) {
   *block_dim_x = cuda_row_data_->max_num_column_per_partition();
-  *block_dim_y = NUM_THRADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
+  *block_dim_y = NUM_THREADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
   *grid_dim_x = cuda_row_data_->num_feature_partitions();
   *grid_dim_y = std::max(min_grid_dim_y_,
     ((num_data_in_smaller_leaf + NUM_DATA_PER_THREAD - 1) / NUM_DATA_PER_THREAD + (*block_dim_y) - 1) / (*block_dim_y));
diff --git a/src/treelearner/cuda/cuda_histogram_constructor.hpp b/src/treelearner/cuda/cuda_histogram_constructor.hpp
index ddc78cb17d90..655029d23ba5 100644
--- a/src/treelearner/cuda/cuda_histogram_constructor.hpp
+++ b/src/treelearner/cuda/cuda_histogram_constructor.hpp
@@ -19,7 +19,7 @@
 #include "cuda_leaf_splits.hpp"
 
 #define NUM_DATA_PER_THREAD (400)
-#define NUM_THRADS_PER_BLOCK (504)
+#define NUM_THREADS_PER_BLOCK (504)
 #define NUM_FEATURE_PER_THREAD_GROUP (28)
 #define SUBTRACT_BLOCK_SIZE (1024)
 #define FIX_HISTOGRAM_SHARED_MEM_SIZE (1024)
diff --git a/src/treelearner/cuda/cuda_leaf_splits.cpp b/src/treelearner/cuda/cuda_leaf_splits.cpp
index 803d4674ee48..2bdd0d47fae1 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.cpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.cpp
@@ -16,7 +16,7 @@ num_data_(num_data) {}
 CUDALeafSplits::~CUDALeafSplits() {}
 
 void CUDALeafSplits::Init(const bool use_quantized_grad) {
-  num_blocks_init_from_gradients_ = (num_data_ + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
+  num_blocks_init_from_gradients_ = (num_data_ + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
 
   // allocate more memory for sum reduction in CUDA
   // only the first element records the final sum
@@ -44,7 +44,7 @@ void CUDALeafSplits::InitValues(
   cuda_hessians_ = cuda_hessians;
   cuda_sum_of_gradients_buffer_.SetValue(0);
   cuda_sum_of_hessians_buffer_.SetValue(0);
-  LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
+  LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
   CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
@@ -59,7 +59,7 @@ void CUDALeafSplits::InitValues(
   const score_t* grad_scale, const score_t* hess_scale) {
   cuda_gradients_ = reinterpret_cast<const score_t*>(cuda_gradients_and_hessians);
   cuda_hessians_ = nullptr;
-  LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
+  LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
   CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
@@ -67,7 +67,7 @@ void CUDALeafSplits::InitValues(
 
 void CUDALeafSplits::Resize(const data_size_t num_data) {
   num_data_ = num_data;
-  num_blocks_init_from_gradients_ = (num_data + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
+  num_blocks_init_from_gradients_ = (num_data + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
   cuda_sum_of_gradients_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
   cuda_sum_of_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
   cuda_sum_of_gradients_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
diff --git a/src/treelearner/cuda/cuda_leaf_splits.cu b/src/treelearner/cuda/cuda_leaf_splits.cu
index ae505ecd55dd..0c796be9f20a 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.cu
+++ b/src/treelearner/cuda/cuda_leaf_splits.cu
@@ -180,23 +180,23 @@ void CUDALeafSplits::LaunchInitValuesEmptyKernel() {
   InitValuesEmptyKernel<<<1, 1>>>(cuda_struct_.RawData());
 }
 
-void CUDALeafSplits::LaunchInitValuesKernal(
+void CUDALeafSplits::LaunchInitValuesKernel(
   const double lambda_l1, const double lambda_l2,
   const data_size_t* cuda_bagging_data_indices,
   const data_size_t* cuda_data_indices_in_leaf,
   const data_size_t num_used_indices,
   hist_t* cuda_hist_in_leaf) {
   if (cuda_bagging_data_indices == nullptr) {
-    CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       cuda_gradients_, cuda_hessians_, num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData());
   } else {
-    CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       cuda_gradients_, cuda_hessians_, num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData());
   }
   SynchronizeCUDADevice(__FILE__, __LINE__);
-  CUDAInitValuesKernel2<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+  CUDAInitValuesKernel2<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
     lambda_l1, lambda_l2,
     num_blocks_init_from_gradients_,
     cuda_sum_of_gradients_buffer_.RawData(),
@@ -208,7 +208,7 @@ void CUDALeafSplits::LaunchInitValuesKernal(
   SynchronizeCUDADevice(__FILE__, __LINE__);
 }
 
-void CUDALeafSplits::LaunchInitValuesKernal(
+void CUDALeafSplits::LaunchInitValuesKernel(
   const double lambda_l1, const double lambda_l2,
   const data_size_t* cuda_bagging_data_indices,
   const data_size_t* cuda_data_indices_in_leaf,
@@ -217,17 +217,17 @@ void CUDALeafSplits::LaunchInitValuesKernal(
   const score_t* grad_scale,
   const score_t* hess_scale) {
   if (cuda_bagging_data_indices == nullptr) {
-    CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
   } else {
-    CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+    CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
       reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
       cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
   }
 
   SynchronizeCUDADevice(__FILE__, __LINE__);
-  CUDAInitValuesKernel4<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
+  CUDAInitValuesKernel4<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
     lambda_l1, lambda_l2,
     num_blocks_init_from_gradients_,
     cuda_sum_of_gradients_buffer_.RawData(),
diff --git a/src/treelearner/cuda/cuda_leaf_splits.hpp b/src/treelearner/cuda/cuda_leaf_splits.hpp
index c2635346098b..43a0492452bd 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.hpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.hpp
@@ -13,7 +13,7 @@
 #include <LightGBM/utils/log.h>
 #include <LightGBM/meta.h>
 
-#define NUM_THRADS_PER_BLOCK_LEAF_SPLITS (1024)
+#define NUM_THREADS_PER_BLOCK_LEAF_SPLITS (1024)
 #define NUM_DATA_THREAD_ADD_LEAF_SPLITS (6)
 
 namespace LightGBM {
@@ -142,14 +142,14 @@ class CUDALeafSplits {
  private:
   void LaunchInitValuesEmptyKernel();
 
-  void LaunchInitValuesKernal(
+  void LaunchInitValuesKernel(
     const double lambda_l1, const double lambda_l2,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf,
     const data_size_t num_used_indices,
     hist_t* cuda_hist_in_leaf);
 
-  void LaunchInitValuesKernal(
+  void LaunchInitValuesKernel(
     const double lambda_l1, const double lambda_l2,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf,
diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp
index 64c342e5b01d..670788118455 100644
--- a/src/treelearner/data_parallel_tree_learner.cpp
+++ b/src/treelearner/data_parallel_tree_learner.cpp
@@ -260,12 +260,12 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
       if (smaller_leaf_num_bits <= 16) {
         std::memcpy(input_buffer_.data() + buffer_write_start_pos_int16_[feature_index],
                     this->smaller_leaf_histogram_array_[feature_index].RawDataInt16(),
-                    this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histgram());
+                    this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histogram());
       } else {
         if (local_smaller_leaf_num_bits == 32) {
           std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
                       this->smaller_leaf_histogram_array_[feature_index].RawDataInt32(),
-                      this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histgram());
+                      this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histogram());
         } else {
           this->smaller_leaf_histogram_array_[feature_index].CopyFromInt16ToInt32(
             input_buffer_.data() + buffer_write_start_pos_[feature_index]);
@@ -274,7 +274,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
     } else {
       std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
                 this->smaller_leaf_histogram_array_[feature_index].RawData(),
-                this->smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
+                this->smaller_leaf_histogram_array_[feature_index].SizeOfHistogram());
     }
   }
   global_timer.Stop("DataParallelTreeLearner::ReduceHistogram::Copy");
diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp
index 70dd0fb5436f..2d4abbd27af1 100644
--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -668,15 +668,15 @@ class FeatureHistogram {
   /*!
    * \brief Binary size of this histogram
    */
-  int SizeOfHistgram() const {
+  int SizeOfHistogram() const {
     return (meta_->num_bin - meta_->offset) * kHistEntrySize;
   }
 
-  int SizeOfInt32Histgram() const {
+  int SizeOfInt32Histogram() const {
     return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize;
   }
 
-  int SizeOfInt16Histgram() const {
+  int SizeOfInt16Histogram() const {
     return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize;
   }
 
diff --git a/src/treelearner/gpu_tree_learner.cpp b/src/treelearner/gpu_tree_learner.cpp
index 7c6c811c3b45..1bf21d65ccc6 100644
--- a/src/treelearner/gpu_tree_learner.cpp
+++ b/src/treelearner/gpu_tree_learner.cpp
@@ -777,7 +777,7 @@ void GPUTreeLearner::ResetIsConstantHessian(bool is_constant_hessian) {
 
 void GPUTreeLearner::BeforeTrain() {
   #if GPU_DEBUG >= 2
-  printf("Copying intial full gradients and hessians to device\n");
+  printf("Copying initial full gradients and hessians to device\n");
   #endif
   // Copy initial full hessians and gradients to GPU.
   // We start copying as early as possible, instead of at ConstructHistogram().
diff --git a/src/treelearner/kernels/histogram_16_64_256.cu b/src/treelearner/kernels/histogram_16_64_256.cu
index d778d650f722..59662fb19d55 100644
--- a/src/treelearner/kernels/histogram_16_64_256.cu
+++ b/src/treelearner/kernels/histogram_16_64_256.cu
@@ -508,7 +508,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
     // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
     for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
         // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer large
+        // we don't need boundary check because we have made the buffer large
         int i_next = i + subglobal_size;
         #ifdef IGNORE_INDICES
         // we need to check to bounds here
@@ -752,7 +752,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
     // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
     // total size: 2 * 256 * size_of(float) = 2 KB
     // organization: each feature/grad/hessian is at a different bank,
-    //               as indepedent of the feature value as possible
+    //               as independent of the feature value as possible
     acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
 
     // counter histogram
diff --git a/src/treelearner/ocl/histogram16.cl b/src/treelearner/ocl/histogram16.cl
index 21624ec9ee10..be590c20666b 100644
--- a/src/treelearner/ocl/histogram16.cl
+++ b/src/treelearner/ocl/histogram16.cl
@@ -8,7 +8,7 @@
 #ifndef __OPENCL_VERSION__
 // If we are including this file in C++,
 // the entire source file following (except the last #endif) will become
-// a raw string literal. The extra ")" is just for mathcing parentheses
+// a raw string literal. The extra ")" is just for matching parentheses
 // to make the editor happy. The extra ")" and extra endif will be skipped.
 // DO NOT add anything between here and the next #ifdef, otherwise you need
 // to modify the skip count at the end of this file.
@@ -475,7 +475,7 @@ R""()
 
 
         // prefetch the next iteration variables
-        // we don't need bondary check because if it is out of boundary, ind_next = 0
+        // we don't need boundary check because if it is out of boundary, ind_next = 0
         #ifndef IGNORE_INDICES
         feature4_next = feature_data[ind_next];
         #endif
diff --git a/src/treelearner/ocl/histogram256.cl b/src/treelearner/ocl/histogram256.cl
index 3351f9efa7c3..b5c049e1272d 100644
--- a/src/treelearner/ocl/histogram256.cl
+++ b/src/treelearner/ocl/histogram256.cl
@@ -387,7 +387,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
     const uint subglobal_tid  = gtid - group_feature * subglobal_size;
     // extract feature mask, when a byte is set to 0, that feature is disabled
     #if ENABLE_ALL_FEATURES == 1
-    // hopefully the compiler will propogate the constants and eliminate all branches
+    // hopefully the compiler will propagate the constants and eliminate all branches
     uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff);
     #else
     uchar4 feature_mask = feature_masks[group_feature];
diff --git a/src/treelearner/ocl/histogram64.cl b/src/treelearner/ocl/histogram64.cl
index 48fa8c506d8b..4ec4d6371df5 100644
--- a/src/treelearner/ocl/histogram64.cl
+++ b/src/treelearner/ocl/histogram64.cl
@@ -454,7 +454,7 @@ R""()
 
 
         // prefetch the next iteration variables
-        // we don't need bondary check because if it is out of boundary, ind_next = 0
+        // we don't need boundary check because if it is out of boundary, ind_next = 0
         #ifndef IGNORE_INDICES
         feature4_next = feature_data[ind_next];
         #endif
diff --git a/src/treelearner/parallel_tree_learner.h b/src/treelearner/parallel_tree_learner.h
index b942dceab28b..aff8ac0fd4c5 100644
--- a/src/treelearner/parallel_tree_learner.h
+++ b/src/treelearner/parallel_tree_learner.h
@@ -148,12 +148,12 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
   * \brief Perform global voting
   * \param leaf_idx index of leaf
   * \param splits All splits from local voting
-  * \param out Result of gobal voting, only store feature indices
+  * \param out Result of global voting, only store feature indices
   */
   void GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits,
     std::vector<int>* out);
   /*!
-  * \brief Copy local histgram to buffer
+  * \brief Copy local histogram to buffer
   * \param smaller_top_features Selected features for smaller leaf
   * \param larger_top_features Selected features for larger leaf
   */
@@ -183,9 +183,9 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
   std::vector<comm_size_t> block_start_;
   /*! \brief Block size for reduce scatter */
   std::vector<comm_size_t> block_len_;
-  /*! \brief Read positions for feature histgrams at smaller leaf */
+  /*! \brief Read positions for feature histograms at smaller leaf */
   std::vector<comm_size_t> smaller_buffer_read_start_pos_;
-  /*! \brief Read positions for feature histgrams at larger leaf */
+  /*! \brief Read positions for feature histograms at larger leaf */
   std::vector<comm_size_t> larger_buffer_read_start_pos_;
   /*! \brief Size for reduce scatter */
   comm_size_t reduce_scatter_size_;
diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp
index 14ede072dc9e..01cdd7623c02 100644
--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -735,24 +735,24 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf,
 
 std::set<int> SerialTreeLearner::FindAllForceFeatures(Json force_split_leaf_setting) {
   std::set<int> force_features;
-  std::queue<Json> force_split_leafs;
+  std::queue<Json> force_split_leaves;
 
-  force_split_leafs.push(force_split_leaf_setting);
+  force_split_leaves.push(force_split_leaf_setting);
 
-  while (!force_split_leafs.empty()) {
-    Json split_leaf = force_split_leafs.front();
-    force_split_leafs.pop();
+  while (!force_split_leaves.empty()) {
+    Json split_leaf = force_split_leaves.front();
+    force_split_leaves.pop();
 
     const int feature_index = split_leaf["feature"].int_value();
     const int feature_inner_index = train_data_->InnerFeatureIndex(feature_index);
     force_features.insert(feature_inner_index);
 
     if (split_leaf.object_items().count("left") > 0) {
-      force_split_leafs.push(split_leaf["left"]);
+      force_split_leaves.push(split_leaf["left"]);
     }
 
     if (split_leaf.object_items().count("right") > 0) {
-      force_split_leafs.push(split_leaf["right"]);
+      force_split_leaves.push(split_leaf["right"]);
     }
   }
 
diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp
index b88db5a7ba28..37f2d4cf2641 100644
--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -207,9 +207,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
           smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
         }
         // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
-        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
-        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
+        cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
+        reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
         ++smaller_idx;
       }
       if (cur_used_features >= cur_total_feature) {
@@ -225,9 +225,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
           larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
         }
         // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
-        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
-        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
+        cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
+        reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
         ++larger_idx;
       }
     }
diff --git a/tests/cpp_tests/test_chunked_array.cpp b/tests/cpp_tests/test_chunked_array.cpp
index 9bfd857299ab..bc58918082a8 100644
--- a/tests/cpp_tests/test_chunked_array.cpp
+++ b/tests/cpp_tests/test_chunked_array.cpp
@@ -217,8 +217,8 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
   // Number of trials for each new ChunkedArray configuration. Pass 100 times over the search space:
   const size_t N_TRIALS = MAX_CHUNKS_SEARCH * MAX_IN_CHUNK_SEARCH_IDX * 100;
   const int INVALID = -1;  // A negative value signaling the requested value lives in an invalid address.
-  const int UNITIALIZED = -99;  // A negative value to signal this was never updated.
-  std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED);  // Memorize latest inserted values.
+  const int UNINITIALIZED = -99;  // A negative value to signal this was never updated.
+  std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED);  // Memorize latest inserted values.
 
   // Each outer loop iteration changes the test by adding +1 chunk. We start with 1 chunk only:
   for (size_t chunks = 1; chunks < MAX_CHUNKS_SEARCH; ++chunks) {
@@ -249,10 +249,10 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
   }
 
   // Final check: ensure even with overrides, all valid insertions store the latest value at that address:
-  std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED);
+  std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED);
   ca_.coalesce_to(coalesced_out.data(), true);  // Export all valid addresses.
   for (size_t i = 0; i < ref_values.size(); ++i) {
-    if (ref_values[i] != UNITIALIZED) {
+    if (ref_values[i] != UNINITIALIZED) {
       // Test in 2 ways that the values are correctly laid out in memory:
       EXPECT_EQ(ca_.getitem(i / CHUNK_SIZE, i % CHUNK_SIZE, INVALID), ref_values[i]);
       EXPECT_EQ(coalesced_out[i], ref_values[i]);
diff --git a/tests/cpp_tests/test_stream.cpp b/tests/cpp_tests/test_stream.cpp
index bc5f73b0a3ee..a656af1e2fe9 100644
--- a/tests/cpp_tests/test_stream.cpp
+++ b/tests/cpp_tests/test_stream.cpp
@@ -17,7 +17,7 @@ using LightGBM::TestUtils;
 
 void test_stream_dense(
   int8_t creation_type,
-  DatasetHandle ref_datset_handle,
+  DatasetHandle ref_dataset_handle,
   int32_t nrows,
   int32_t ncols,
   int32_t nclasses,
@@ -86,7 +86,7 @@ void test_stream_dense(
 
       case 1:
         Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows dense data with a batch size of %d", nrows, batch_count);
-        result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
+        result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
         EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
         break;
     }
@@ -131,7 +131,7 @@ void test_stream_dense(
 
 void test_stream_sparse(
   int8_t creation_type,
-  DatasetHandle ref_datset_handle,
+  DatasetHandle ref_dataset_handle,
   int32_t nrows,
   int32_t ncols,
   int32_t nclasses,
@@ -203,7 +203,7 @@ void test_stream_sparse(
 
       case 1:
         Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows sparse data with a batch size of %d", nrows, batch_count);
-        result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
+        result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
         EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
         break;
     }
@@ -249,13 +249,13 @@ void test_stream_sparse(
 
 TEST(Stream, PushDenseRowsWithMetadata) {
   // Load some test data
-  DatasetHandle ref_datset_handle;
+  DatasetHandle ref_dataset_handle;
   const char* params = "max_bin=15";
   // Use the smaller ".test" data because we don't care about the actual data and it's smaller
-  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
+  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
   EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
 
-  Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
+  Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
   auto noriginalrows = ref_dataset->num_data();
   Log::Info("Row count: %d", noriginalrows);
   Log::Info("Feature group count: %d", ref_dataset->num_features());
@@ -266,9 +266,9 @@ TEST(Stream, PushDenseRowsWithMetadata) {
   unused_init_scores.resize(noriginalrows * nclasses);
   std::vector<int32_t> unused_groups;
   unused_groups.assign(noriginalrows, 1);
-  result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
-  result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
 
   // Now use the reference dataset schema to make some testable Datasets with N rows each
@@ -290,23 +290,23 @@ TEST(Stream, PushDenseRowsWithMetadata) {
     for (size_t j = 0; j < batch_counts.size(); ++j) {
       auto type = creation_types[i];
       auto batch_count = batch_counts[j];
-      test_stream_dense(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
+      test_stream_dense(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
     }
   }
 
-  result = LGBM_DatasetFree(ref_datset_handle);
+  result = LGBM_DatasetFree(ref_dataset_handle);
   EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
 }
 
 TEST(Stream, PushSparseRowsWithMetadata) {
   // Load some test data
-  DatasetHandle ref_datset_handle;
+  DatasetHandle ref_dataset_handle;
   const char* params = "max_bin=15";
   // Use the smaller ".test" data because we don't care about the actual data and it's smaller
-  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
+  int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
   EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
 
-  Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
+  Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
   auto noriginalrows = ref_dataset->num_data();
   Log::Info("Row count: %d", noriginalrows);
   Log::Info("Feature group count: %d", ref_dataset->num_features());
@@ -317,9 +317,9 @@ TEST(Stream, PushSparseRowsWithMetadata) {
   unused_init_scores.resize(noriginalrows * nclasses);
   std::vector<int32_t> unused_groups;
   unused_groups.assign(noriginalrows, 1);
-  result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
-  result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
+  result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
   EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
 
   // Now use the reference dataset schema to make some testable Datasets with N rows each
@@ -344,10 +344,10 @@ TEST(Stream, PushSparseRowsWithMetadata) {
     for (size_t j = 0; j < batch_counts.size(); ++j) {
       auto type = creation_types[i];
       auto batch_count = batch_counts[j];
-      test_stream_sparse(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
+      test_stream_sparse(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
     }
   }
 
-  result = LGBM_DatasetFree(ref_datset_handle);
+  result = LGBM_DatasetFree(ref_dataset_handle);
   EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
 }
diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 2eeba46f2869..b5e17991f63d 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -471,7 +471,7 @@ def test_classifier_custom_objective(output, task, cluster):
         assert_eq(p1_proba, p1_proba_local)
 
 
-def test_machines_to_worker_map_unparseable_host_names():
+def test_machines_to_worker_map_unparsable_host_names():
     workers = {"0.0.0.1:80": {}, "0.0.0.2:80": {}}
     machines = "0.0.0.1:80,0.0.0.2:80"
     with pytest.raises(ValueError, match="Could not parse host name from worker address '0.0.0.1:80'"):
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 9ae471e7f4b9..cb2e893c9612 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -660,7 +660,7 @@ def test_ranking_prediction_early_stopping():
 
 
 # Simulates position bias for a given ranking dataset.
-# The ouput dataset is identical to the input one with the exception for the relevance labels.
+# The output dataset is identical to the input one with the exception for the relevance labels.
 # The new labels are generated according to an instance of a cascade user model:
 # for each query, the user is simulated to be traversing the list of documents ranked by a baseline ranker
 # (in our example it is simply the ordering by some feature correlated with relevance, e.g., 34)

From ea04c66c86e31ebf68ec151d75c14fbdfb6ea681 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 3 Dec 2024 15:08:10 +0300
Subject: [PATCH 2/4] [docs] update installation guide (#6696)

* Update Installation-Guide.rst

* Update script.js

* replace all Ninja sections with one paragraph

---------

Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
---
 docs/Installation-Guide.rst | 621 ++++++++++++++++++++++--------------
 docs/_static/js/script.js   |   4 +-
 2 files changed, 389 insertions(+), 236 deletions(-)

diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
index 41b84f9b82c2..1e28d037388d 100644
--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@@ -1,17 +1,30 @@
 Installation Guide
 ==================
 
-This is a guide for building the LightGBM Command Line Interface (CLI). If you want to build the Python-package or R-package please refer to `Python-package`_ and `R-package`_ folders respectively.
-
 All instructions below are aimed at compiling the 64-bit version of LightGBM.
 It is worth compiling the 32-bit version only in very rare special cases involving environmental limitations.
 The 32-bit version is slow and untested, so use it at your own risk and don't forget to adjust some of the commands below when installing.
 
+By default, instructions below will use **VS Build Tools** or **make** tool to compile the code.
+It it possible to use `Ninja`_ tool instead of make on all platforms, but VS Build Tools cannot be replaced with Ninja.
+You can add ``-G Ninja`` to CMake flags to use Ninja.
+
+By default, instructions below will produce a shared library file and an executable file with command-line interface.
+You can add ``-DBUILD_CLI=OFF`` to CMake flags to disable the executable compilation.
+
 If you need to build a static library instead of a shared one, you can add ``-DBUILD_STATIC_LIB=ON`` to CMake flags.
 
+By default, instructions below will place header files into system-wide folder.
+You can add ``-DINSTALL_HEADERS=OFF`` to CMake flags to disable headers installation.
+
+By default, on macOS, CMake is looking into Homebrew standard folders for finding dependencies (e.g. OpenMP).
+You can add ``-DUSE_HOMEBREW_FALLBACK=OFF`` to CMake flags to disable this behaviour.
+
 Users who want to perform benchmarking can make LightGBM output time costs for different internal routines by adding ``-DUSE_TIMETAG=ON`` to CMake flags.
 
-It is possible to build LightGBM in debug mode. In this mode all compiler optimizations are disabled and LightGBM performs more checks internally. To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM.
+It is possible to build LightGBM in debug mode.
+In this mode all compiler optimizations are disabled and LightGBM performs more checks internally.
+To enable debug mode you can add ``-DUSE_DEBUG=ON`` to CMake flags or choose ``Debug_*`` configuration (e.g. ``Debug_DLL``, ``Debug_mpi``) in Visual Studio depending on how you are building LightGBM.
 
 .. _sanitizers:
 
@@ -30,7 +43,7 @@ It is very useful to build `C++ unit tests <#build-c-unit-tests>`__ with sanitiz
 
 .. _nightly-builds:
 
-You can also download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|.
+You can download the artifacts of the latest successful build on master branch (nightly builds) here: |download artifacts|.
 
 .. contents:: **Contents**
     :depth: 1
@@ -40,12 +53,10 @@ You can also download the artifacts of the latest successful build on master bra
 Windows
 ~~~~~~~
 
-On Windows LightGBM can be built using
+On Windows, LightGBM can be built using
 
 - **Visual Studio**;
-
 - **CMake** and **VS Build Tools**;
-
 - **CMake** and **MinGW**.
 
 Visual Studio (or VS Build Tools)
@@ -54,22 +65,23 @@ Visual Studio (or VS Build Tools)
 With GUI
 ********
 
-1. Install `Visual Studio`_ (2015 or newer).
+1. Install `Visual Studio`_.
 
 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-3. Go to ``LightGBM-master/windows`` folder.
+3. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder.
+The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder.
 
 From Command Line
 *****************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -98,7 +110,7 @@ MinGW-w64
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.
 
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 It is recommended that you use **Visual Studio** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).
@@ -106,9 +118,17 @@ It is recommended that you use **Visual Studio** since it has better multithread
 Linux
 ~~~~~
 
-On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**.
+On Linux, LightGBM can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-1. Install `CMake`_.
+gcc
+^^^
+
+1. Install `CMake`_ and **gcc**.
 
 2. Run the following commands:
 
@@ -119,53 +139,69 @@ On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**.
      cmake -B build -S .
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+Clang
+^^^^^
 
-Using ``Ninja``
-^^^^^^^^^^^^^^^
+1. Install `CMake`_, **Clang** and **OpenMP**.
 
-On Linux, LightGBM can also be built with `Ninja <https://ninja-build.org/>`__ instead of ``make``.
+2. Run the following commands:
 
-.. code:: sh
+   .. code:: sh
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -G 'Ninja'
-     cmake --build build -j2
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S .
+     cmake --build build -j4
 
 macOS
 ~~~~~
 
-On macOS LightGBM can be installed using **Homebrew**, or can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, LightGBM can be installed using
 
-Apple Clang
-^^^^^^^^^^^
+- **Homebrew**;
+- **MacPorts**;
+
+or can be built using
 
-Only **Apple Clang** version 8.1 or higher is supported.
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
 
 Install Using ``Homebrew``
-**************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. code:: sh
 
   brew install lightgbm
 
-Build from GitHub
-*****************
+Refer to https://formulae.brew.sh/formula/lightgbm for more details.
 
-1. Install `CMake`_ :
+Install Using ``MacPorts``
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-   .. code:: sh
+.. code:: sh
 
-     brew install cmake
+  sudo port install LightGBM
+
+Refer to https://ports.macports.org/port/LightGBM for more details.
+
+**Note**: Port for LightGBM is not maintained by LightGBM's maintainers.
 
-2. Install **OpenMP**:
+Build from GitHub
+^^^^^^^^^^^^^^^^^
+
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
+
+Apple Clang
+***********
+
+1. Install `CMake`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install libomp
+     brew install cmake libomp
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -175,21 +211,15 @@ Build from GitHub
      cmake --build build -j4
 
 gcc
-^^^
-
-1. Install `CMake`_ :
-
-   .. code:: sh
-
-     brew install cmake
+***
 
-2. Install **gcc**:
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install gcc
+     brew install cmake gcc
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -213,12 +243,10 @@ You can build LightGBM without OpenMP support but it is **strongly not recommend
 Windows
 ^^^^^^^
 
-On Windows a version of LightGBM without OpenMP support can be built using
+On Windows, a version of LightGBM without OpenMP support can be built using
 
 - **Visual Studio**;
-
 - **CMake** and **VS Build Tools**;
-
 - **CMake** and **MinGW**.
 
 Visual Studio (or VS Build Tools)
@@ -227,26 +255,27 @@ Visual Studio (or VS Build Tools)
 With GUI
 --------
 
-1. Install `Visual Studio`_ (2015 or newer).
+1. Install `Visual Studio`_.
 
 2. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-3. Go to ``LightGBM-master/windows`` folder.
+3. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-4. Open ``LightGBM.sln`` file with **Visual Studio**.
+4. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release`` configuration if you need executable file or ``DLL`` configuration if you need shared library.
 
-5. Go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``.
+5. Go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``C/C++`` -> ``Language`` and change the ``OpenMP Support`` property to ``No (/openmp-)``.
 
-6. Get back to the project's main screen, then choose ``Release`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+6. Get back to the project's main screen and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release`` folder.
+The ``.dll`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/DLL`` folder.
 
 From Command Line
 -----------------
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -275,14 +304,36 @@ MinGW-w64
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.
 
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 Linux
 ^^^^^
 
-On Linux a version of LightGBM without OpenMP support can be built using **CMake** and **gcc** or **Clang**.
+On Linux, a version of LightGBM without OpenMP support can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
+
+gcc
+***
+
+1. Install `CMake`_ and **gcc**.
+
+2. Run the following commands:
+
+   .. code:: sh
 
-1. Install `CMake`_.
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_OPENMP=OFF
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_ and **Clang**.
 
 2. Run the following commands:
 
@@ -290,20 +341,24 @@ On Linux a version of LightGBM without OpenMP support can be built using **CMake
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
      cmake -B build -S . -DUSE_OPENMP=OFF
      cmake --build build -j4
 
 macOS
 ^^^^^
 
-On macOS a version of LightGBM without OpenMP support can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, a version of LightGBM without OpenMP support can be built using
+
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
+
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
 
 Apple Clang
 ***********
 
-Only **Apple Clang** version 8.1 or higher is supported.
-
-1. Install `CMake`_ :
+1. Install `CMake`_:
 
    .. code:: sh
 
@@ -321,19 +376,13 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
+     brew install cmake gcc
 
-2. Install **gcc**:
-
-   .. code:: sh
-
-     brew install gcc
-
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -354,35 +403,36 @@ If you need to run a distributed learning application with high performance comm
 Windows
 ^^^^^^^
 
-On Windows an MPI version of LightGBM can be built using
+On Windows, an MPI version of LightGBM can be built using
 
 - **MS MPI** and **Visual Studio**;
-
 - **MS MPI**, **CMake** and **VS Build Tools**.
 
+**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it.
+
 With GUI
 ********
 
 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed.
 
-2. Install `Visual Studio`_ (2015 or newer).
+2. Install `Visual Studio`_.
 
 3. Navigate to one of the releases at https://github.com/microsoft/LightGBM/releases, download ``LightGBM-complete_source_code_zip.zip``, and unzip it.
 
-4. Go to ``LightGBM-master/windows`` folder.
+4. Go to ``LightGBM-complete_source_code_zip/windows`` folder.
 
-5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``BUILD`` -> ``Build Solution (Ctrl+Shift+B)``.
+5. Open ``LightGBM.sln`` file with **Visual Studio**, choose ``Release_mpi`` configuration and click ``Build`` -> ``Build Solution (Ctrl+Shift+B)``.
 
-   If you have errors about **Platform Toolset**, go to ``PROJECT`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
+   If you have errors about **Platform Toolset**, go to ``Project`` -> ``Properties`` -> ``Configuration Properties`` -> ``General`` and select the toolset installed on your machine.
 
-The ``.exe`` file will be in ``LightGBM-master/windows/x64/Release_mpi`` folder.
+The ``.exe`` file will be in ``LightGBM-complete_source_code_zip/windows/x64/Release_mpi`` folder.
 
 From Command Line
 *****************
 
 1. You need to install `MS MPI`_ first. Both ``msmpisdk.msi`` and ``msmpisetup.exe`` are needed.
 
-2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+2. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 3. Run the following commands:
 
@@ -395,18 +445,22 @@ From Command Line
 
 The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.
 
-**Note**: Building MPI version by **MinGW** is not supported due to the miss of MPI library in it.
-
 Linux
 ^^^^^
 
-On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **gcc** or **Clang**.
+On Linux, an MPI version of LightGBM can be built using
 
-1. Install `Open MPI`_.
+- **CMake**, **gcc** and **Open MPI**;
+- **CMake**, **Clang** and **Open MPI**.
 
-2. Install `CMake`_.
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-3. Run the following commands:
+gcc
+***
+
+1. Install `CMake`_, **gcc** and `Open MPI`_.
+
+2. Run the following commands:
 
    .. code:: sh
 
@@ -415,37 +469,41 @@ On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** a
      cmake -B build -S . -DUSE_MPI=ON
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
-
-macOS
-^^^^^
+Clang
+*****
 
-On macOS an MPI version of LightGBM can be built using **Open MPI**, **CMake** and **Apple Clang** or **gcc**.
+1. Install `CMake`_, **Clang**, **OpenMP** and `Open MPI`_.
 
-Apple Clang
-***********
+2. Run the following commands:
 
-Only **Apple Clang** version 8.1 or higher is supported.
+   .. code:: sh
 
-1. Install `CMake`_ :
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_MPI=ON
+     cmake --build build -j4
 
-   .. code:: sh
+macOS
+^^^^^
 
-     brew install cmake
+On macOS, an MPI version of LightGBM can be built using
 
-2. Install **OpenMP**:
+- **CMake**, **Open MPI** and **Apple Clang**;
+- **CMake**, **Open MPI** and **gcc**.
 
-   .. code:: sh
+After compilation the executable and ``.dylib`` files will be in ``LightGBM/`` folder.
 
-     brew install libomp
+Apple Clang
+***********
 
-3. Install **Open MPI**:
+1. Install `CMake`_, **OpenMP** and `Open MPI`_:
 
    .. code:: sh
 
-     brew install open-mpi
+     brew install cmake libomp open-mpi
 
-4. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -457,25 +515,13 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
-
-   .. code:: sh
-
-     brew install cmake
-
-2. Install **gcc**:
+1. Install `CMake`_, `Open MPI`_ and  **gcc**:
 
    .. code:: sh
 
-     brew install gcc
+     brew install cmake open-mpi gcc
 
-3. Install **Open MPI**:
-
-   .. code:: sh
-
-     brew install open-mpi
-
-4. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -488,48 +534,19 @@ gcc
 Build GPU Version
 ~~~~~~~~~~~~~~~~~
 
-Linux
-^^^^^
-
-On Linux a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **gcc** or **Clang**.
-
-The following dependencies should be installed before compilation:
-
--  **OpenCL** 1.2 headers and libraries, which is usually provided by GPU manufacture.
-
-   The generic OpenCL ICD packages (for example, Debian package ``ocl-icd-libopencl1`` and ``ocl-icd-opencl-dev``) can also be used.
-
--  **libboost** 1.56 or later (1.61 or later is recommended).
-
-   We use Boost.Compute as the interface to GPU, which is part of the Boost library since version 1.61. However, since we include the source code of Boost.Compute as a submodule, we only require the host has Boost 1.56 or later installed. We also use Boost.Align for memory allocation. Boost.Compute requires Boost.System and Boost.Filesystem to store offline kernel cache.
-
-   The following Debian packages should provide necessary Boost libraries: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``.
-
--  **CMake**
-
-To build LightGBM GPU version, run the following commands:
-
-.. code:: sh
-
-  git clone --recursive https://github.com/microsoft/LightGBM
-  cd LightGBM
-  cmake -B build -S . -DUSE_GPU=1
-  # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-  # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
-  cmake --build build
-
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
-
 Windows
 ^^^^^^^
 
-On Windows a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **VS Build Tools** or **MinGW**.
+On Windows, a GPU version of LightGBM (``device_type=gpu``) can be built using
+
+- **OpenCL**, **Boost**, **CMake** and **VS Build Tools**;
+- **OpenCL**, **Boost**, **CMake** and **MinGW**.
 
 If you use **MinGW**, the build procedure is similar to the build on Linux.
 
 Following procedure is for the **MSVC** (Microsoft Visual C++) build.
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is installed).
 
 2. Install **OpenCL** for Windows. The installation depends on the brand (NVIDIA, AMD, Intel) of your GPU card.
 
@@ -559,13 +576,68 @@ Following procedure is for the **MSVC** (Microsoft Visual C++) build.
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
+     cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
      # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-     # cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include"
+     # cmake -B build -S . -A x64 -DUSE_GPU=ON -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include"
      cmake --build build --target ALL_BUILD --config Release
 
    **Note**: ``C:/local/boost_1_63_0`` and ``C:/local/boost_1_63_0/lib64-msvc-14.0`` are locations of your **Boost** binaries (assuming you've downloaded 1.63.0 version for Visual Studio 2015).
 
+The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.
+
+Linux
+^^^^^
+
+On Linux, a GPU version of LightGBM (``device_type=gpu``) can be built using
+
+- **CMake**, **OpenCL**, **Boost** and **gcc**;
+- **CMake**, **OpenCL**, **Boost** and **Clang**.
+
+**OpenCL** headers and libraries are usually provided by GPU manufacture.
+The generic OpenCL ICD packages (for example, Debian packages ``ocl-icd-libopencl1``, ``ocl-icd-opencl-dev``, ``pocl-opencl-icd``) can also be used.
+
+Required **Boost** libraries (Boost.Align, Boost.System, Boost.Filesystem, Boost.Chrono) should be provided by the following Debian packages: ``libboost-dev``, ``libboost-system-dev``, ``libboost-filesystem-dev``, ``libboost-chrono-dev``.
+
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
+
+gcc
+***
+
+1. Install `CMake`_, **gcc**, **OpenCL** and **Boost**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_GPU=ON
+     # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
+     # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang**, **OpenMP**, **OpenCL** and **Boost**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_GPU=ON
+     # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
+     # cmake -B build -S . -DUSE_GPU=ON -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+     cmake --build build -j4
+
+macOS
+^^^^^
+
+The GPU version is not supported on macOS.
+
 Docker
 ^^^^^^
 
@@ -574,60 +646,84 @@ Refer to `GPU Docker folder <https://github.com/microsoft/LightGBM/tree/master/d
 Build CUDA Version
 ~~~~~~~~~~~~~~~~~~
 
-The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
+The `original GPU version <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
 
-The CUDA-based build (``device_type=cuda``) is a separate implementation.
+The CUDA-based version (``device_type=cuda``) is a separate implementation.
 Use this version in Linux environments with an NVIDIA GPU with compute capability 6.0 or higher.
 
+Windows
+^^^^^^^
+
+The CUDA version is not supported on Windows.
+Use the `GPU version <#build-gpu-version>`__ (``device_type=gpu``) for GPU acceleration on Windows.
+
 Linux
 ^^^^^
 
-On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and **gcc** or **Clang**.
+On Linux, a CUDA version of LightGBM can be built using
 
-The following dependencies should be installed before compilation:
+- **CMake**, **gcc** and **CUDA**;
+- **CMake**, **Clang** and **CUDA**.
 
--  **CUDA** 11.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
+Please refer to `this detailed guide`_ for **CUDA** libraries installation.
 
--  **CMake**
+After compilation the executable and ``.so`` files will be in ``LightGBM/`` folder.
 
-To build LightGBM CUDA version, run the following commands:
+gcc
+***
 
-.. code:: sh
+1. Install `CMake`_, **gcc** and **CUDA**.
+
+2. Run the following commands:
 
-  git clone --recursive https://github.com/microsoft/LightGBM
-  cd LightGBM
-  cmake -B build -S . -DUSE_CUDA=1
-  cmake --build build -j4
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -DUSE_CUDA=ON
+     cmake --build build -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang**, **OpenMP** and **CUDA**.
+
+2. Run the following commands:
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_CUDA=ON
+     cmake --build build -j4
 
 macOS
 ^^^^^
 
 The CUDA version is not supported on macOS.
 
-Windows
-^^^^^^^
-
-The CUDA version is not supported on Windows.
-Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.
-
 Build Java Wrapper
 ~~~~~~~~~~~~~~~~~~
 
 Using the following instructions you can generate a JAR file containing the LightGBM `C API <./Development-Guide.rst#c-api>`__ wrapped by **SWIG**.
 
+After compilation the ``.jar`` file will be in ``LightGBM/build`` folder.
+
 Windows
 ^^^^^^^
 
-On Windows a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **VS Build Tools** or **MinGW**.
+On Windows, a Java wrapper of LightGBM can be built using
+
+- **Java**, **SWIG**, **CMake** and **VS Build Tools**;
+- **Java**, **SWIG**, **CMake** and **MinGW**.
 
 VS Build Tools
 **************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
-2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 3. Run the following commands:
 
@@ -638,14 +734,12 @@ VS Build Tools
      cmake -B build -S . -A x64 -DUSE_SWIG=ON
      cmake --build build --target ALL_BUILD --config Release
 
-The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/Release`` folder.
-
 MinGW-w64
 *********
 
 1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_.
 
-2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+2. Install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 3. Run the following commands:
 
@@ -656,9 +750,7 @@ MinGW-w64
      cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON
      cmake --build build -j4
 
-The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/`` folder.
-
-**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
 
 It is recommended to use **VS Build Tools (Visual Studio)** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).
@@ -666,9 +758,15 @@ It is recommended to use **VS Build Tools (Visual Studio)** since it has better
 Linux
 ^^^^^
 
-On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **gcc** or **Clang**.
+On Linux, a Java wrapper of LightGBM can be built using
+
+- **CMake**, **gcc**, **Java** and **SWIG**;
+- **CMake**, **Clang**, **Java** and **SWIG**.
+
+gcc
+***
 
-1. Install `CMake`_, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
+1. Install `CMake`_, **gcc**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
 2. Run the following commands:
 
@@ -679,34 +777,40 @@ On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMa
      cmake -B build -S . -DUSE_SWIG=ON
      cmake --build build -j4
 
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+Clang
+*****
 
-macOS
-^^^^^
+1. Install `CMake`_, **Clang**, **OpenMP**, `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly).
 
-On macOS a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMake** and **Apple Clang** or **gcc**.
+2. Run the following commands:
 
-First, install `SWIG`_ and **Java** (also make sure that ``JAVA_HOME`` is set properly).
-Then, either follow the **Apple Clang** or **gcc** installation instructions below.
+   .. code:: sh
 
-Apple Clang
-***********
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DUSE_SWIG=ON
+     cmake --build build -j4
 
-Only **Apple Clang** version 8.1 or higher is supported.
+macOS
+^^^^^
 
-1. Install `CMake`_ :
+On macOS, a Java wrapper of LightGBM can be built using
 
-   .. code:: sh
+- **CMake**, **Java**, **SWIG** and **Apple Clang**;
+- **CMake**, **Java**, **SWIG** and **gcc**.
 
-     brew install cmake
+Apple Clang
+***********
 
-2. Install **OpenMP**:
+1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install libomp
+     brew install cmake openjdk swig libomp
+     export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/"
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -718,19 +822,14 @@ Only **Apple Clang** version 8.1 or higher is supported.
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_, **Java** (also make sure that ``JAVA_HOME`` environment variable is set properly), `SWIG`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
-
-2. Install **gcc**:
-
-   .. code:: sh
-
-     brew install gcc
+     brew install cmake openjdk swig gcc
+     export JAVA_HOME="$(brew --prefix openjdk)/libexec/openjdk.jdk/Contents/Home/"
 
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
@@ -740,15 +839,31 @@ gcc
      cmake -B build -S . -DUSE_SWIG=ON
      cmake --build build -j4
 
+Build Python-package
+~~~~~~~~~~~~~~~~~~~~
+
+Refer to `Python-package folder <https://github.com/microsoft/LightGBM/tree/master/python-package>`__.
+
+Build R-package
+~~~~~~~~~~~~~~~
+
+Refer to `R-package folder <https://github.com/microsoft/LightGBM/tree/master/R-package>`__.
+
 Build C++ Unit Tests
 ~~~~~~~~~~~~~~~~~~~~
 
 Windows
 ^^^^^^^
 
-On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Build Tools**.
+On Windows, C++ unit tests of LightGBM can be built using
+
+- **CMake** and **VS Build Tools**;
+- **CMake** and **MinGW**.
+
+VS Build Tools
+**************
 
-1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** (2015 or newer) is already installed).
+1. Install `Git for Windows`_, `CMake`_ and `VS Build Tools`_ (**VS Build Tools** is not needed if **Visual Studio** is already installed).
 
 2. Run the following commands:
 
@@ -756,17 +871,43 @@ On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Bui
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm --config Debug
 
 The ``.exe`` file will be in ``LightGBM/Debug`` folder.
 
+MinGW-w64
+*********
+
+1. Install `Git for Windows`_, `CMake`_ and `MinGW-w64`_.
+
+2. Run the following commands:
+
+   .. code:: console
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON
+     cmake --build build --target testlightgbm -j4
+
+The ``.exe`` file will be in ``LightGBM/`` folder.
+
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DBUILD_CPP_TEST=ON`` one more time or add ``-DCMAKE_SH=CMAKE_SH-NOTFOUND`` to CMake flags if you encounter the ``sh.exe was found in your PATH`` error.
+
 Linux
 ^^^^^
 
-On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** or **Clang**.
+On Linux, a C++ unit tests of LightGBM can be built using
+
+- **CMake** and **gcc**;
+- **CMake** and **Clang**.
+
+After compilation the executable file will be in ``LightGBM/`` folder.
+
+gcc
+***
 
-1. Install `CMake`_.
+1. Install `CMake`_ and **gcc**.
 
 2. Run the following commands:
 
@@ -774,24 +915,42 @@ On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** o
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
+     cmake --build build --target testlightgbm -j4
+
+Clang
+*****
+
+1. Install `CMake`_, **Clang** and **OpenMP**.
+
+2. Run the following commands:
+
+   .. code:: sh
+
+     git clone --recursive https://github.com/microsoft/LightGBM
+     cd LightGBM
+     export CXX=clang++-14 CC=clang-14  # replace "14" with version of Clang installed on your machine
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 macOS
 ^^^^^
 
-On macOS a C++ unit tests of LightGBM can be built using **CMake** and **Apple Clang** or **gcc**.
+On macOS, a C++ unit tests of LightGBM can be built using
+
+- **CMake** and **Apple Clang**;
+- **CMake** and **gcc**.
+
+After compilation the executable file will be in ``LightGBM/`` folder.
 
 Apple Clang
 ***********
 
-Only **Apple Clang** version 8.1 or higher is supported.
-
-1. Install `CMake`_ :
+1. Install `CMake`_ and **OpenMP**:
 
    .. code:: sh
 
-     brew install cmake
+     brew install cmake libomp
 
 2. Run the following commands:
 
@@ -799,42 +958,32 @@ Only **Apple Clang** version 8.1 or higher is supported.
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 gcc
 ***
 
-1. Install `CMake`_ :
+1. Install `CMake`_ and **gcc**:
 
    .. code:: sh
 
-     brew install cmake
-
-2. Install **gcc**:
+     brew install cmake gcc
 
-   .. code:: sh
-
-     brew install gcc
-
-3. Run the following commands:
+2. Run the following commands:
 
    .. code:: sh
 
      git clone --recursive https://github.com/microsoft/LightGBM
      cd LightGBM
      export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake -B build -S . -DBUILD_CPP_TEST=ON
      cmake --build build --target testlightgbm -j4
 
 
 .. |download artifacts| image:: ./_static/images/artifacts-not-available.svg
    :target: https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html
 
-.. _Python-package: https://github.com/microsoft/LightGBM/tree/master/python-package
-
-.. _R-package: https://github.com/microsoft/LightGBM/tree/master/R-package
-
 .. _Visual Studio: https://visualstudio.microsoft.com/downloads/
 
 .. _Git for Windows: https://git-scm.com/download/win
@@ -864,3 +1013,5 @@ gcc
 .. _this detailed guide: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
 
 .. _following docs: https://github.com/google/sanitizers/wiki
+
+.. _Ninja: https://ninja-build.org
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index c4717b8a0ee5..c6d21713fe5c 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -15,7 +15,7 @@ $(() => {
     /* Collapse specified sections in the installation guide */
     if (window.location.pathname.toLocaleLowerCase().indexOf("installation-guide") !== -1) {
         $(
-            '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
+            '<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f054";} .opened:before {content: "\\f078";}</style>',
         ).appendTo("body");
         const collapsible = [
             "#build-threadless-version-not-recommended",
@@ -23,6 +23,8 @@ $(() => {
             "#build-gpu-version",
             "#build-cuda-version",
             "#build-java-wrapper",
+            "#build-python-package",
+            "#build-r-package",
             "#build-c-unit-tests",
         ];
         $.each(collapsible, (_, val) => {

From 6e0b0a8be44b14ade10737288a26aa361a00a18e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 3 Dec 2024 20:05:31 -0600
Subject: [PATCH 3/4] [python-package] simplify scikit-learn 1.6+ tags support
 (#6735)

---
 python-package/lightgbm/compat.py         | 10 ----------
 python-package/lightgbm/sklearn.py        | 15 +++++----------
 tests/python_package_test/test_sklearn.py |  6 ++++++
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index 0b9444b0ecbf..96dee6522572 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -14,14 +14,6 @@
     from sklearn.utils.multiclass import check_classification_targets
     from sklearn.utils.validation import assert_all_finite, check_array, check_X_y
 
-    # sklearn.utils Tags types can be imported unconditionally once
-    # lightgbm's minimum scikit-learn version is 1.6 or higher
-    try:
-        from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags
-        from sklearn.utils import RegressorTags as _sklearn_RegressorTags
-    except ImportError:
-        _sklearn_ClassifierTags = None
-        _sklearn_RegressorTags = None
     try:
         from sklearn.exceptions import NotFittedError
         from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold
@@ -148,8 +140,6 @@ class _LGBMRegressorBase:  # type: ignore
     _LGBMCheckClassificationTargets = None
     _LGBMComputeSampleWeight = None
     _LGBMValidateData = None
-    _sklearn_ClassifierTags = None
-    _sklearn_RegressorTags = None
     _sklearn_version = None
 
 # additional scikit-learn imports only for type hints
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d730b66c3556..108ef1e14498 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -40,8 +40,6 @@
     _LGBMModelBase,
     _LGBMRegressorBase,
     _LGBMValidateData,
-    _sklearn_ClassifierTags,
-    _sklearn_RegressorTags,
     _sklearn_version,
     dt_DataTable,
     pd_DataFrame,
@@ -726,7 +724,7 @@ def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
         # take whatever tags are provided by BaseEstimator, then modify
         # them with LightGBM-specific values
         return self._update_sklearn_tags_from_dict(
-            tags=_LGBMModelBase.__sklearn_tags__(self),
+            tags=super().__sklearn_tags__(),
             tags_dict=self._more_tags(),
         )
 
@@ -1298,10 +1296,7 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        tags = LGBMModel.__sklearn_tags__(self)
-        tags.estimator_type = "regressor"
-        tags.regressor_tags = _sklearn_RegressorTags(multi_label=False)
-        return tags
+        return super().__sklearn_tags__()
 
     def fit(  # type: ignore[override]
         self,
@@ -1360,9 +1355,9 @@ def _more_tags(self) -> Dict[str, Any]:
         return tags
 
     def __sklearn_tags__(self) -> "_sklearn_Tags":
-        tags = LGBMModel.__sklearn_tags__(self)
-        tags.estimator_type = "classifier"
-        tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False)
+        tags = super().__sklearn_tags__()
+        tags.classifier_tags.multi_class = True
+        tags.classifier_tags.multi_label = False
         return tags
 
     def fit(  # type: ignore[override]
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index d187e9df5a9f..1cdd047f1857 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1488,6 +1488,12 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato
         assert sklearn_tags.input_tags.allow_nan is True
         assert sklearn_tags.input_tags.sparse is True
         assert sklearn_tags.target_tags.one_d_labels is True
+        if estimator_class is lgb.LGBMClassifier:
+            assert sklearn_tags.estimator_type == "classifier"
+            assert sklearn_tags.classifier_tags.multi_class is True
+            assert sklearn_tags.classifier_tags.multi_label is False
+        elif estimator_class is lgb.LGBMRegressor:
+            assert sklearn_tags.estimator_type == "regressor"
 
 
 @pytest.mark.parametrize("task", all_tasks)

From d4d6c87db02a146ac6dc04b00f538e02a3b22250 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 5 Dec 2024 10:24:35 -0600
Subject: [PATCH 4/4] [c++] include <cstdint> wherever uint8_t is used (#6736)

---
 include/LightGBM/bin.h                     | 1 +
 include/LightGBM/cuda/cuda_column_data.hpp | 1 +
 include/LightGBM/cuda/cuda_row_data.hpp    | 1 +
 include/LightGBM/dataset.h                 | 1 +
 include/LightGBM/feature_group.h           | 1 +
 include/LightGBM/train_share_states.h      | 1 +
 include/LightGBM/tree.h                    | 1 +
 src/c_api.cpp                              | 1 +
 src/io/cuda/cuda_column_data.cpp           | 2 ++
 src/io/json11.cpp                          | 1 +
 10 files changed, 11 insertions(+)

diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h
index a33fcfa9c45c..5826f2387102 100644
--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -9,6 +9,7 @@
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/file_io.h>
 
+#include <cstdint>
 #include <limits>
 #include <string>
 #include <functional>
diff --git a/include/LightGBM/cuda/cuda_column_data.hpp b/include/LightGBM/cuda/cuda_column_data.hpp
index 314a178859c6..8875cd151d7d 100644
--- a/include/LightGBM/cuda/cuda_column_data.hpp
+++ b/include/LightGBM/cuda/cuda_column_data.hpp
@@ -13,6 +13,7 @@
 #include <LightGBM/bin.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 
+#include <cstdint>
 #include <vector>
 
 namespace LightGBM {
diff --git a/include/LightGBM/cuda/cuda_row_data.hpp b/include/LightGBM/cuda/cuda_row_data.hpp
index 1d4cb2f73b1e..85da72bc083d 100644
--- a/include/LightGBM/cuda/cuda_row_data.hpp
+++ b/include/LightGBM/cuda/cuda_row_data.hpp
@@ -15,6 +15,7 @@
 #include <LightGBM/train_share_states.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 
+#include <cstdint>
 #include <vector>
 
 #define COPY_SUBROW_BLOCK_SIZE_ROW_DATA (1024)
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index ef214b7cd89d..c2a4b62296f2 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -15,6 +15,7 @@
 #include <LightGBM/utils/random.h>
 #include <LightGBM/utils/text_reader.h>
 
+#include <cstdint>
 #include <string>
 #include <functional>
 #include <map>
diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h
index f13a5fff966f..bcc0388ba507 100644
--- a/include/LightGBM/feature_group.h
+++ b/include/LightGBM/feature_group.h
@@ -10,6 +10,7 @@
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/random.h>
 
+#include <cstdint>
 #include <cstdio>
 #include <memory>
 #include <vector>
diff --git a/include/LightGBM/train_share_states.h b/include/LightGBM/train_share_states.h
index f102668edf70..e4f4e4afea5f 100644
--- a/include/LightGBM/train_share_states.h
+++ b/include/LightGBM/train_share_states.h
@@ -11,6 +11,7 @@
 #include <LightGBM/utils/threading.h>
 
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <vector>
 
diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h
index c28ddd140c48..bc5af621e402 100644
--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -8,6 +8,7 @@
 #include <LightGBM/dataset.h>
 #include <LightGBM/meta.h>
 
+#include <cstdint>
 #include <string>
 #include <map>
 #include <memory>
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 98748bc9ff2f..cf6577ad5e2c 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -22,6 +22,7 @@
 
 #include <string>
 #include <cstdio>
+#include <cstdint>
 #include <functional>
 #include <memory>
 #include <mutex>
diff --git a/src/io/cuda/cuda_column_data.cpp b/src/io/cuda/cuda_column_data.cpp
index eb0938c01225..415578847f07 100644
--- a/src/io/cuda/cuda_column_data.cpp
+++ b/src/io/cuda/cuda_column_data.cpp
@@ -7,6 +7,8 @@
 
 #include <LightGBM/cuda/cuda_column_data.hpp>
 
+#include <cstdint>
+
 namespace LightGBM {
 
 CUDAColumnData::CUDAColumnData(const data_size_t num_data, const int gpu_device_id) {
diff --git a/src/io/json11.cpp b/src/io/json11.cpp
index 32a9c9d718b7..acd09f9ecb12 100644
--- a/src/io/json11.cpp
+++ b/src/io/json11.cpp
@@ -23,6 +23,7 @@
 #include <LightGBM/utils/log.h>
 
 #include <cmath>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <limits>