From 5e3d3cae946a40f1c8135172d3411108d3153427 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 18 Feb 2021 23:57:50 -0600 Subject: [PATCH] revert changes from #4000 --- CMakeLists.txt | 2 +- README.md | 2 +- examples/binary_classification/train.conf | 6 +++--- examples/binary_classification/train_linear.conf | 6 +++--- examples/lambdarank/train.conf | 6 +++--- examples/parallel_learning/README.md | 7 ++++--- examples/parallel_learning/train.conf | 6 +++--- examples/regression/train.conf | 6 +++--- examples/xendcg/train.conf | 6 +++--- include/LightGBM/config.h | 10 +++++----- include/LightGBM/dataset.h | 2 +- python-package/lightgbm/basic.py | 2 +- src/application/application.cpp | 2 +- src/io/config.cpp | 2 +- src/io/dataset_loader.cpp | 2 +- src/io/metadata.cpp | 4 ++-- 16 files changed, 36 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29a786d3a506..3273ff135d81 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -OPTION(USE_MPI "Enable MPI-based distributed learning" OFF) +OPTION(USE_MPI "Enable MPI-based parallel learning" OFF) OPTION(USE_OPENMP "Enable OpenMP" ON) OPTION(USE_GPU "Enable GPU-accelerated training" OFF) OPTION(USE_SWIG "Enable SWIG to generate Java API" OFF) diff --git a/README.md b/README.md index a0dd5c55a899..c8554cb1ef97 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Next you may want to read: - [**Examples**](https://github.com/microsoft/LightGBM/tree/master/examples) showing command line usage of common tasks. - [**Features**](https://github.com/microsoft/LightGBM/blob/master/docs/Features.rst) and algorithms supported by LightGBM. - [**Parameters**](https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst) is an exhaustive list of customization you can make. -- [**Distributed Learning Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst) and [**GPU Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/GPU-Tutorial.rst) can speed up computation. +- [**Parallel Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst) and [**GPU Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/GPU-Tutorial.rst) can speed up computation. - [**Laurae++ interactive documentation**](https://sites.google.com/view/lauraepp/parameters) is a detailed guide for hyperparameters. - [**Optuna Hyperparameter Tuner**](https://medium.com/optuna/lightgbm-tuner-new-optuna-integration-for-hyperparameter-optimization-8b7095e99258) provides automated tuning for LightGBM hyperparameters ([code examples](https://github.com/optuna/optuna/blob/master/examples/)). diff --git a/examples/binary_classification/train.conf b/examples/binary_classification/train.conf index f9788aae592a..e4ca69b1dcd3 100644 --- a/examples/binary_classification/train.conf +++ b/examples/binary_classification/train.conf @@ -98,13 +98,13 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 1 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt # force splits diff --git a/examples/binary_classification/train_linear.conf b/examples/binary_classification/train_linear.conf index e47cc58cd124..616d5fc39e35 100644 --- a/examples/binary_classification/train_linear.conf +++ b/examples/binary_classification/train_linear.conf @@ -100,13 +100,13 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 1 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt # force splits diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf index 16192f222f7f..d3ead83f8d5c 100644 --- a/examples/lambdarank/train.conf +++ b/examples/lambdarank/train.conf @@ -103,11 +103,11 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 1 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt diff --git a/examples/parallel_learning/README.md b/examples/parallel_learning/README.md index e4252452c335..d95805b39db5 100644 --- a/examples/parallel_learning/README.md +++ b/examples/parallel_learning/README.md @@ -1,7 +1,8 @@ Distributed Learning Example ============================ + -Here is an example for LightGBM to perform distributed learning for 2 machines. +Here is an example for LightGBM to perform parallel learning for 2 machines. 1. Edit [mlist.txt](./mlist.txt): write the ip of these 2 machines that you want to run application on. @@ -16,6 +17,6 @@ Here is an example for LightGBM to perform distributed learning for 2 machines. ```"./lightgbm" config=train.conf``` -This distributed learning example is based on socket. LightGBM also supports distributed learning based on mpi. +This parallel learning example is based on socket. LightGBM also supports parallel learning based on mpi. -For more details about the usage of distributed learning, please refer to [this](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst). +For more details about the usage of parallel learning, please refer to [this](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst). diff --git a/examples/parallel_learning/train.conf b/examples/parallel_learning/train.conf index dbc58b8234c8..6076a80887ca 100644 --- a/examples/parallel_learning/train.conf +++ b/examples/parallel_learning/train.conf @@ -98,11 +98,11 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 2 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt diff --git a/examples/regression/train.conf b/examples/regression/train.conf index 7fac419a5ba4..b62e99d7dc27 100644 --- a/examples/regression/train.conf +++ b/examples/regression/train.conf @@ -101,11 +101,11 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 1 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt diff --git a/examples/xendcg/train.conf b/examples/xendcg/train.conf index c98870ed8461..4715841ca64f 100644 --- a/examples/xendcg/train.conf +++ b/examples/xendcg/train.conf @@ -104,11 +104,11 @@ output_model = LightGBM_model.txt # output_result= prediction.txt -# number of machines in distributed training, alias: num_machine +# number of machines in parallel training, alias: num_machine num_machines = 1 -# local listening port in distributed training, alias: local_port +# local listening port in parallel training, alias: local_port local_listen_port = 12400 -# machines list file for distributed training, alias: mlist +# machines list file for parallel training, alias: mlist machine_list_file = mlist.txt diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index b3b46b05656b..4d0686912091 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -200,7 +200,7 @@ struct Config { // desc = ``feature``, feature parallel tree learner, aliases: ``feature_parallel`` // desc = ``data``, data parallel tree learner, aliases: ``data_parallel`` // desc = ``voting``, voting parallel tree learner, aliases: ``voting_parallel`` - // desc = refer to `Distributed Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details + // desc = refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details std::string tree_learner = "serial"; // alias = num_thread, nthread, nthreads, n_jobs @@ -209,7 +209,7 @@ struct Config { // desc = for the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPUs use `hyper-threading `__ to generate 2 threads per CPU core) // desc = do not set it too large if your dataset is small (for instance, do not use 64 threads for a dataset with 10,000 rows) // desc = be aware a task manager or any similar CPU monitoring tool might report that cores not being fully utilized. **This is normal** - // desc = for distributed learning, do not use all CPU cores because this will cause poor performance for the network communication + // desc = for parallel learning, do not use all CPU cores because this will cause poor performance for the network communication // desc = **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors int num_threads = 0; @@ -634,7 +634,7 @@ struct Config { bool feature_pre_filter = true; // alias = is_pre_partition - // desc = used for distributed learning (excluding the ``feature_parallel`` mode) + // desc = used for parallel learning (excluding the ``feature_parallel`` mode) // desc = ``true`` if training data are pre-partitioned, and different machines use different partitions bool pre_partition = false; @@ -961,7 +961,7 @@ struct Config { // check = >0 // alias = num_machine - // desc = the number of machines for distributed learning application + // desc = the number of machines for parallel learning application // desc = this parameter is needed to be set in both **socket** and **mpi** versions int num_machines = 1; @@ -976,7 +976,7 @@ struct Config { int time_out = 120; // alias = machine_list_file, machine_list, mlist - // desc = path of file that lists machines for this distributed learning application + // desc = path of file that lists machines for this parallel learning application // desc = each line contains one IP and one port for one machine. The format is ``ip port`` (space as a separator) // desc = **Note**: can be used only in CLI version std::string machine_list_filename = ""; diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 61989e221bcc..90f48e70c744 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -80,7 +80,7 @@ class Metadata { /*! * \brief Partition meta data according to local used indices if need - * \param num_all_data Number of total training data, including other machines' data on distributed learning + * \param num_all_data Number of total training data, including other machines' data on parallel learning * \param used_data_indices Indices of local used training data */ void CheckOrPartition(data_size_t num_all_data, diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 39a4d8e9da57..b036c4f81b2a 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2333,7 +2333,7 @@ def set_network(self, machines, local_listen_port=12400, listen_time_out : int, optional (default=120) Socket time-out in minutes. num_machines : int, optional (default=1) - The number of machines for distributed learning application. + The number of machines for parallel learning application. Returns ------- diff --git a/src/application/application.cpp b/src/application/application.cpp index e82cfcada98f..62583db72b6c 100644 --- a/src/application/application.cpp +++ b/src/application/application.cpp @@ -105,7 +105,7 @@ void Application::LoadData() { config_.num_class, config_.data.c_str()); // load Training data if (config_.is_data_based_parallel) { - // load data for distributed training + // load data for parallel training train_data_.reset(dataset_loader.LoadFromFile(config_.data.c_str(), Network::rank(), Network::num_machines())); } else { diff --git a/src/io/config.cpp b/src/io/config.cpp index fbb9e339933f..dc04cb972d23 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -374,7 +374,7 @@ void Config::CheckParamConflict() { } if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) { // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints. - Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in distributed learning, auto set to \"basic\" method."); + Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in parallel learning, auto set to \"basic\" method."); monotone_constraints_method = "basic"; } if (feature_fraction_bynode != 1.0 && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) { diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 545ffcaad849..05c65ee16744 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -183,7 +183,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac // don't support query id in data file when training in parallel if (num_machines > 1 && !config_.pre_partition) { if (group_idx_ > 0) { - Log::Fatal("Using a query id without pre-partitioning the data file is not supported for distributed training.\n" + Log::Fatal("Using a query id without pre-partitioning the data file is not supported for parallel training.\n" "Please use an additional query file or pre-partition the data"); } } diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp index 63a1690906a2..8ab4da8d74f2 100644 --- a/src/io/metadata.cpp +++ b/src/io/metadata.cpp @@ -22,7 +22,7 @@ Metadata::Metadata() { void Metadata::Init(const char* data_filename) { data_filename_ = data_filename; - // for lambdarank, it needs query data for partition data in distributed learning + // for lambdarank, it needs query data for partition data in parallel learning LoadQueryBoundaries(); LoadWeights(); LoadQueryWeights(); @@ -187,7 +187,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector(used_data_indices.size()); // check weights