From e5b6f31042bbccb17edebf3707e97fd906cca2b1 Mon Sep 17 00:00:00 2001 From: robhowley Date: Mon, 1 Feb 2021 21:53:07 -0500 Subject: [PATCH 01/32] chore: ignore swig jni file --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 959462864825..04cbe70582fb 100644 --- a/.gitignore +++ b/.gitignore @@ -453,3 +453,6 @@ dask-worker-space/ *.pub *.rdp *_rsa + +# swig jni +*_swig.jnilib \ No newline at end of file From 77183433fb79ed5e0bf3c112dae969479cd2ccee Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 2 Feb 2021 15:35:30 -0500 Subject: [PATCH 02/32] chore: run tests on pr to position_debias --- .github/workflows/cuda.yml | 1 + .github/workflows/optional_checks.yml | 1 + .github/workflows/python_package.yml | 1 + .github/workflows/static_analysis.yml | 1 + 4 files changed, 4 insertions(+) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 4b47d9bef65a..849d2492a12a 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - master + - position_debias env: github_actions: 'true' diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml index 2f6bd789b3b9..a24657e9b641 100644 --- a/.github/workflows/optional_checks.yml +++ b/.github/workflows/optional_checks.yml @@ -4,6 +4,7 @@ on: pull_request: branches: - master + - position_debias jobs: all-successful: diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index 22a9b58ef993..df7301af3daa 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - master + - position_debias env: CONDA_ENV: test-env diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 9f0faa12d1fd..a6400b5f9052 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -9,6 +9,7 @@ on: pull_request: branches: - master + - position_debias env: COMPILER: 'gcc' From 706300008ef949a4a151fafcc730853115c63bbc Mon Sep 17 00:00:00 2001 From: robert-howley-zocdoc Date: Wed, 3 Feb 2021 12:02:26 -0500 Subject: [PATCH 03/32] Update cuda.yml --- .github/workflows/cuda.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 849d2492a12a..4b47d9bef65a 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -7,7 +7,6 @@ on: pull_request: branches: - master - - position_debias env: github_actions: 'true' From 482c1431b5a344b9dd15c9e4ef037ba96750d20b Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 3 Feb 2021 12:48:35 -0500 Subject: [PATCH 04/32] feat: crude copy paste --- src/objective/rank_objective.hpp | 138 ++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index a720a69a3148..85a721c13a37 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -101,7 +101,9 @@ class LambdarankNDCG : public RankingObjective { : RankingObjective(config), sigmoid_(config.sigmoid), norm_(config.lambdarank_norm), - truncation_level_(config.lambdarank_truncation_level) { + truncation_level_(config.lambdarank_truncation_level), + _position_bins(config.position_bins), + _eta(config.eta) { label_gain_ = config.label_gain; // initialize DCG calculator DCGCalculator::DefaultLabelGain(&label_gain_); @@ -252,6 +254,110 @@ class LambdarankNDCG : public RankingObjective { } } + void InitPositionBiases() { /// + i_biases_.resize(_position_bins); + i_biases_pow_.resize(_position_bins); + j_biases_.resize(_position_bins); + j_biases_pow_.resize(_position_bins); + for (size_t i = 0; i < _position_bins; ++i) { + i_biases_[i] = 1.0f; + i_biases_pow_[i] = 1.0f; + j_biases_[i] = 1.0f; + j_biases_pow_[i] = 1.0f; + } + } + + void InitPositionGradients() { /// + position_cnts_.resize(_position_bins); + position_scores_.resize(_position_bins); + position_lambdas_.resize(_position_bins); + i_costs_.resize(_position_bins); + j_costs_.resize(_position_bins); + for (size_t i = 0; i < _position_bins; ++i) { + position_cnts_[i] = 0LL; + position_scores_[i] = 0.0f; + position_lambdas_[i] = 0.0f; + i_costs_[i] = 0.0f; + j_costs_[i] = 0.0f; + } + + for (int i = 0; i < num_threads_; i++) { + position_cnts_buffer_.emplace_back(_position_bins, 0LL); + position_scores_buffer_.emplace_back(_position_bins, 0.0f); + position_lambdas_buffer_.emplace_back(_position_bins, 0.0f); + i_costs_buffer_.emplace_back(_position_bins, 0.0f); + j_costs_buffer_.emplace_back(_position_bins, 0.0f); + } + } + + void UpdatePositionBiases() const { + // accumulate the parallel results + for (int i = 0; i < num_threads_; i++) { + for (size_t j = 0; j < _position_bins; ++j) { + position_cnts_[j] += position_cnts_buffer_[i][j]; + position_scores_[j] += position_scores_buffer_[i][j]; + position_lambdas_[j] += position_lambdas_buffer_[i][j]; + i_costs_[j] += i_costs_buffer_[i][j]; + j_costs_[j] += j_costs_buffer_[i][j]; + } + } + + long long position_cnts_sum = 0LL; + for (size_t i = 0; i < _position_bins; ++i) { + position_cnts_sum += position_cnts_[i]; + } + std::cout << "" << std::endl; + std::cout << "eta: " << _eta << ", pair_cnt_sum: " << position_cnts_sum << std::endl; + std::cout << std::setw(10) << "position" + << std::setw(15) << "bias_i" + << std::setw(15) << "bias_j" + << std::setw(15) << "score" + << std::setw(15) << "lambda" + << std::setw(15) << "high_pair_cnt" + << std::setw(15) << "i_cost" + << std::setw(15) << "j_cost" + << std::endl; + for (size_t i = 0; i < _position_bins; ++i) { /// + std::cout << std::setw(10) << i + << std::setw(15) << i_biases_pow_[i] + << std::setw(15) << j_biases_pow_[i] + << std::setw(15) << position_scores_[i] / num_queries_ + << std::setw(15) << - position_lambdas_[i] / num_queries_ + << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum + << std::setw(15) << i_costs_[i] / position_cnts_sum + << std::setw(15) << j_costs_[i] / position_cnts_sum + << std::endl; + } + + // Update bias + for (size_t i = 0; i < _position_bins; ++i) { /// + i_biases_[i] = i_costs_[i] / i_costs_[0]; + i_biases_pow_[i] = pow(i_biases_[i], _eta); + } + for (size_t i = 0; i < _position_bins; ++i) { /// + j_biases_[i] = j_costs_[i] / j_costs_[0]; + j_biases_pow_[i] = pow(j_biases_[i], _eta); + } + // Clear Buffer + for (size_t i = 0; i < _position_bins; ++i) { /// + position_cnts_[i] = 0LL; + position_scores_[i] = 0.0f; + position_lambdas_[i] = 0.0f; + i_costs_[i] = 0.0f; + j_costs_[i] = 0.0f; + } + + for (int i = 0; i < num_threads_; i++) { + for (size_t j = 0; j < _position_bins; ++j) { + position_cnts_buffer_[i][j] = 0LL; + position_scores_buffer_[i][j] = 0.0f; + position_lambdas_buffer_[i][j] = 0.0f; + i_costs_buffer_[i][j] = 0.0f; + j_costs_buffer_[i][j] = 0.0f; + } + } + } + const char* GetName() const override { return "lambdarank"; } private: @@ -275,6 +381,36 @@ class LambdarankNDCG : public RankingObjective { double max_sigmoid_input_ = 50; /*! \brief Factor that covert score to bin in sigmoid table */ double sigmoid_table_idx_factor_; + + // bias correction variables + mutable std::vector i_biases_; + /*! \brief pow position biases */ + mutable std::vector i_biases_pow_; + + mutable std::vector j_biases_; + /*! \brief pow position biases */ + mutable std::vector j_biases_pow_; + + /*! \brief position cnts */ + mutable std::vector position_cnts_; + mutable std::vector> position_cnts_buffer_; + /*! \brief position scores */ + mutable std::vector position_scores_; + mutable std::vector> position_scores_buffer_; + /*! \brief position lambdas */ + mutable std::vector position_lambdas_; + mutable std::vector> position_lambdas_buffer_; + // mutable double position cost; + mutable std::vector i_costs_; + mutable std::vector> i_costs_buffer_; + + mutable std::vector j_costs_; + mutable std::vector> j_costs_buffer_; + + /*! \brief Number of exponent */ + double _eta; + /*! \brief Number of positions */ + size_t _position_bins; }; /*! From 4bae3adddf2e50fbf1a86b4ded4e69992391051f Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 3 Feb 2021 15:26:33 -0500 Subject: [PATCH 05/32] feat: add unbiased lambdamart config params --- docs/Parameters.rst | 16 ++++++++++++++++ include/LightGBM/config.h | 13 +++++++++++++ src/io/config_auto.cpp | 14 ++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 5a4880154611..a569ceffaed0 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -1023,6 +1023,22 @@ Objective Parameters - separate by ``,`` +- ``lambdarank_unbiased`` :raw-html:`🔗︎`, default = ``false``, type = bool + + - used only in ``lambdarank`` application + + - set this to ``true`` to use the position bias correction of `Unbiased LambdaMART `__ + +- ``lambdarank_position_bins`` :raw-html:`🔗︎`, default = ``12``, type = int, constraints: ``lambdarank_position_bins > 0`` + + - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` + +- ``lambdarank_eta`` :raw-html:`🔗︎`, default = ``0.5``, type = double, constraints: ``lambdarank_eta >= 0.0`` + + - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` + + - position bias ratio regularizer exponent will be set to ``1 / (1 + eta)`` + Metric Parameters ----------------- diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 8b0800007c4a..4ee6a04869f7 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -879,6 +879,19 @@ struct Config { // desc = separate by ``,`` std::vector label_gain; + // desc = used only in ``lambdarank`` application + // desc = set this to ``true`` to use the position bias correction of `Unbiased LambdaMART `__ + bool lambdarank_unbiased = false; + + // check = >0 + // desc = used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` + int lambdarank_position_bins = 12; + + // check = >=0.0 + // desc = used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` + // desc = position bias ratio regularizer exponent will be set to ``1 / (1 + eta)`` + double lambdarank_eta = 0.5; + #pragma endregion #pragma region Metric Parameters diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 06c53e84268a..aaba0c2b8eaf 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -285,6 +285,9 @@ const std::unordered_set& Config::parameter_set() { "lambdarank_truncation_level", "lambdarank_norm", "label_gain", + "lambdarank_unbiased", + "lambdarank_position_bins", + "lambdarank_eta", "metric", "metric_freq", "is_provide_training_metric", @@ -588,6 +591,14 @@ void Config::GetMembersFromString(const std::unordered_map(tmp_str, ','); } + GetBool(params, "lambdarank_unbiased", &lambdarank_unbiased); + + GetInt(params, "lambdarank_position_bins", &lambdarank_position_bins); + CHECK_GT(lambdarank_position_bins, 0); + + GetDouble(params, "lambdarank_eta", &lambdarank_eta); + CHECK_GE(lambdarank_eta, 0.0); + GetInt(params, "metric_freq", &metric_freq); CHECK_GT(metric_freq, 0); @@ -721,6 +732,9 @@ std::string Config::SaveMembersToString() const { str_buf << "[lambdarank_truncation_level: " << lambdarank_truncation_level << "]\n"; str_buf << "[lambdarank_norm: " << lambdarank_norm << "]\n"; str_buf << "[label_gain: " << Common::Join(label_gain, ",") << "]\n"; + str_buf << "[lambdarank_unbiased: " << lambdarank_unbiased << "]\n"; + str_buf << "[lambdarank_position_bins: " << lambdarank_position_bins << "]\n"; + str_buf << "[lambdarank_eta: " << lambdarank_eta << "]\n"; str_buf << "[eval_at: " << Common::Join(eval_at, ",") << "]\n"; str_buf << "[multi_error_top_k: " << multi_error_top_k << "]\n"; str_buf << "[auc_mu_weights: " << Common::Join(auc_mu_weights, ",") << "]\n"; From 3cbcb62b3748e6f587d96728cfe4798ae4438334 Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 09:42:11 -0500 Subject: [PATCH 06/32] chore: variable init --- src/objective/rank_objective.hpp | 142 ++++++++++++++++++------------- 1 file changed, 85 insertions(+), 57 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 85a721c13a37..366a35139817 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -102,8 +103,9 @@ class LambdarankNDCG : public RankingObjective { sigmoid_(config.sigmoid), norm_(config.lambdarank_norm), truncation_level_(config.lambdarank_truncation_level), - _position_bins(config.position_bins), - _eta(config.eta) { + unbiased_(config.lambdarank_unbiased), + eta_(config.lambdarank_eta), + position_bins_(config.lambdarank_position_bins) { label_gain_ = config.label_gain; // initialize DCG calculator DCGCalculator::DefaultLabelGain(&label_gain_); @@ -113,6 +115,8 @@ class LambdarankNDCG : public RankingObjective { if (sigmoid_ <= 0.0) { Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_); } + + num_threads_ = omp_get_num_threads(); } explicit LambdarankNDCG(const std::vector& strs) @@ -138,6 +142,15 @@ class LambdarankNDCG : public RankingObjective { ConstructSigmoidTable(); } + void GetGradients(const double* score, score_t* gradients, + score_t* hessians) const override { + RankingObjective::GetGradients(score, gradients, hessians); + + if (unbiased_) { + UpdatePositionBiases(); + } + } + inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, const label_t* label, const double* score, score_t* lambdas, @@ -172,6 +185,8 @@ class LambdarankNDCG : public RankingObjective { if (score[sorted_idx[j]] == kMinScore) { continue; } // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } + + // diff data_size_t high_rank, low_rank; if (label[sorted_idx[i]] > label[sorted_idx[j]]) { high_rank = i; @@ -255,11 +270,11 @@ class LambdarankNDCG : public RankingObjective { } void InitPositionBiases() { /// - i_biases_.resize(_position_bins); - i_biases_pow_.resize(_position_bins); - j_biases_.resize(_position_bins); - j_biases_pow_.resize(_position_bins); - for (size_t i = 0; i < _position_bins; ++i) { + i_biases_.resize(position_bins_); + i_biases_pow_.resize(position_bins_); + j_biases_.resize(position_bins_); + j_biases_pow_.resize(position_bins_); + for (size_t i = 0; i < position_bins_; ++i) { i_biases_[i] = 1.0f; i_biases_pow_[i] = 1.0f; j_biases_[i] = 1.0f; @@ -268,12 +283,12 @@ class LambdarankNDCG : public RankingObjective { } void InitPositionGradients() { /// - position_cnts_.resize(_position_bins); - position_scores_.resize(_position_bins); - position_lambdas_.resize(_position_bins); - i_costs_.resize(_position_bins); - j_costs_.resize(_position_bins); - for (size_t i = 0; i < _position_bins; ++i) { + position_cnts_.resize(position_bins_); + position_scores_.resize(position_bins_); + position_lambdas_.resize(position_bins_); + i_costs_.resize(position_bins_); + j_costs_.resize(position_bins_); + for (size_t i = 0; i < position_bins_; ++i) { position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; position_lambdas_[i] = 0.0f; @@ -282,18 +297,18 @@ class LambdarankNDCG : public RankingObjective { } for (int i = 0; i < num_threads_; i++) { - position_cnts_buffer_.emplace_back(_position_bins, 0LL); - position_scores_buffer_.emplace_back(_position_bins, 0.0f); - position_lambdas_buffer_.emplace_back(_position_bins, 0.0f); - i_costs_buffer_.emplace_back(_position_bins, 0.0f); - j_costs_buffer_.emplace_back(_position_bins, 0.0f); + position_cnts_buffer_.emplace_back(position_bins_, 0LL); + position_scores_buffer_.emplace_back(position_bins_, 0.0f); + position_lambdas_buffer_.emplace_back(position_bins_, 0.0f); + i_costs_buffer_.emplace_back(position_bins_, 0.0f); + j_costs_buffer_.emplace_back(position_bins_, 0.0f); } } void UpdatePositionBiases() const { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { - for (size_t j = 0; j < _position_bins; ++j) { + for (size_t j = 0; j < position_bins_; ++j) { position_cnts_[j] += position_cnts_buffer_[i][j]; position_scores_[j] += position_scores_buffer_[i][j]; position_lambdas_[j] += position_lambdas_buffer_[i][j]; @@ -302,44 +317,17 @@ class LambdarankNDCG : public RankingObjective { } } - long long position_cnts_sum = 0LL; - for (size_t i = 0; i < _position_bins; ++i) { - position_cnts_sum += position_cnts_[i]; - } - std::cout << "" << std::endl; - std::cout << "eta: " << _eta << ", pair_cnt_sum: " << position_cnts_sum << std::endl; - std::cout << std::setw(10) << "position" - << std::setw(15) << "bias_i" - << std::setw(15) << "bias_j" - << std::setw(15) << "score" - << std::setw(15) << "lambda" - << std::setw(15) << "high_pair_cnt" - << std::setw(15) << "i_cost" - << std::setw(15) << "j_cost" - << std::endl; - for (size_t i = 0; i < _position_bins; ++i) { /// - std::cout << std::setw(10) << i - << std::setw(15) << i_biases_pow_[i] - << std::setw(15) << j_biases_pow_[i] - << std::setw(15) << position_scores_[i] / num_queries_ - << std::setw(15) << - position_lambdas_[i] / num_queries_ - << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum - << std::setw(15) << i_costs_[i] / position_cnts_sum - << std::setw(15) << j_costs_[i] / position_cnts_sum - << std::endl; - } + LogDebugPositionBiases(); - // Update bias - for (size_t i = 0; i < _position_bins; ++i) { /// + for (size_t i = 0; i < position_bins_; ++i) { + // Update bias i_biases_[i] = i_costs_[i] / i_costs_[0]; - i_biases_pow_[i] = pow(i_biases_[i], _eta); - } - for (size_t i = 0; i < _position_bins; ++i) { /// + i_biases_pow_[i] = pow(i_biases_[i], eta_); + j_biases_[i] = j_costs_[i] / j_costs_[0]; - j_biases_pow_[i] = pow(j_biases_[i], _eta); - } - // Clear Buffer - for (size_t i = 0; i < _position_bins; ++i) { /// + j_biases_pow_[i] = pow(j_biases_[i], eta_); + + // Clear position info position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; position_lambdas_[i] = 0.0f; @@ -347,8 +335,9 @@ class LambdarankNDCG : public RankingObjective { j_costs_[i] = 0.0f; } + // Clear Buffer for (int i = 0; i < num_threads_; i++) { - for (size_t j = 0; j < _position_bins; ++j) { + for (size_t j = 0; j < position_bins_; ++j) { position_cnts_buffer_[i][j] = 0LL; position_scores_buffer_[i][j] = 0.0f; position_lambdas_buffer_[i][j] = 0.0f; @@ -361,6 +350,40 @@ class LambdarankNDCG : public RankingObjective { const char* GetName() const override { return "lambdarank"; } private: + void LogDebugPositionBiases() const { + long long position_cnts_sum = 0LL; + for (size_t i = 0; i < position_bins_; ++i) { + position_cnts_sum += position_cnts_[i]; + } + + Log::Debug(""); + Log::Debug("eta: %.1f, position_cnts_sum: %i", eta_, position_cnts_sum); + + std::stringstream message_stream; + message_stream << std::setw(10) << "position" + << std::setw(15) << "bias_i" + << std::setw(15) << "bias_j" + << std::setw(15) << "score" + << std::setw(15) << "lambda" + << std::setw(15) << "high_pair_cnt" + << std::setw(15) << "i_cost" + << std::setw(15) << "j_cost"; + Log::Debug(message_stream.str().c_str()); + + for (size_t i = 0; i < position_bins_; ++i) { /// + message_stream << std::setw(10) << i + << std::setw(15) << i_biases_pow_[i] + << std::setw(15) << j_biases_pow_[i] + << std::setw(15) << position_scores_[i] / num_queries_ + << std::setw(15) << -position_lambdas_[i] / num_queries_ + << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum + << std::setw(15) << i_costs_[i] / position_cnts_sum + << std::setw(15) << j_costs_[i] / position_cnts_sum + << std::endl; + Log::Debug(message_stream.str().c_str()); + } + } + /*! \brief Simgoid param */ double sigmoid_; /*! \brief Normalize the lambdas or not */ @@ -407,10 +430,15 @@ class LambdarankNDCG : public RankingObjective { mutable std::vector j_costs_; mutable std::vector> j_costs_buffer_; + /*! \brief Should use unbiased lambdarank */ + bool unbiased_; /*! \brief Number of exponent */ - double _eta; + double eta_; /*! \brief Number of positions */ - size_t _position_bins; + size_t position_bins_; + + /*! \brief Number of threads */ + int num_threads_; }; /*! From a0680e66131d4713305b3527da5beec721c7ce4e Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 12:22:48 -0500 Subject: [PATCH 07/32] feat: add bias corrected lambda accumulators --- src/objective/rank_objective.hpp | 103 ++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 36 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 366a35139817..3350d350cccb 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -104,8 +104,7 @@ class LambdarankNDCG : public RankingObjective { norm_(config.lambdarank_norm), truncation_level_(config.lambdarank_truncation_level), unbiased_(config.lambdarank_unbiased), - eta_(config.lambdarank_eta), - position_bins_(config.lambdarank_position_bins) { + eta_(config.lambdarank_eta) { label_gain_ = config.label_gain; // initialize DCG calculator DCGCalculator::DefaultLabelGain(&label_gain_); @@ -155,6 +154,8 @@ class LambdarankNDCG : public RankingObjective { const label_t* label, const double* score, score_t* lambdas, score_t* hessians) const override { + const int tid = omp_get_thread_num(); // get thread ID + // get max DCG on current query const double inverse_max_dcg = inverse_max_dcgs_[query_id]; // initialize with zero @@ -215,23 +216,43 @@ class LambdarankNDCG : public RankingObjective { // get delta NDCG double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; // regular the delta_pair_NDCG by score distance - if (norm_ && best_score != worst_score) { + if ((norm_ || unbiased_) && best_score != worst_score) { delta_pair_NDCG /= (0.01f + fabs(delta_score)); } // calculate lambda for this pair double p_lambda = GetSigmoid(delta_score); - double p_hessian = p_lambda * (1.0f - p_lambda); + double p_hessian = p_lambda * (1.0f - p_lambda); // check that 1.0 instead of 2.0 is ok + + if (unbiased_) { + // check that 1.0 instead of 2.0 is ok + // might need a sigmoid_ thrown in somewhere + double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; /// log(1+e^(-sigma*(si-sj))) + + // orig has += high_sum_cost_i + // but that is just an in loop accumulator to avoid element look up + // that var that can be removed, lookup is fine + i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; + j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; + } + // update - p_lambda *= -sigmoid_ * delta_pair_NDCG; - p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG; + // orig doesn't have sigmoid_ + p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; + + // orig has 2.0 * delta / bias related to always defaulting sigmoid to 2 + // this has a sigmoid_^2, check impact + p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; ; + lambdas[low] -= static_cast(p_lambda); hessians[low] += static_cast(p_hessian); lambdas[high] += static_cast(p_lambda); hessians[high] += static_cast(p_hessian); + // lambda is negative, so use minus to accumulate sum_lambdas -= 2 * p_lambda; } } + if (norm_ && sum_lambdas > 0) { double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas; for (data_size_t i = 0; i < cnt; ++i) { @@ -239,6 +260,14 @@ class LambdarankNDCG : public RankingObjective { hessians[i] = static_cast(hessians[i] * norm_factor); } } + + if (unbiased_) { + // calculate position score, and position lambda + for (data_size_t i = 0; i < cnt && i < truncation_level_; ++i) { /// + position_scores_buffer_[tid][i] += score[i]; + position_lambdas_buffer_[tid][i] += lambdas[i]; + } + } } inline double GetSigmoid(double score) const { @@ -269,12 +298,12 @@ class LambdarankNDCG : public RankingObjective { } } - void InitPositionBiases() { /// - i_biases_.resize(position_bins_); - i_biases_pow_.resize(position_bins_); - j_biases_.resize(position_bins_); - j_biases_pow_.resize(position_bins_); - for (size_t i = 0; i < position_bins_; ++i) { + void InitPositionBiases() { + i_biases_.resize(truncation_level_); + i_biases_pow_.resize(truncation_level_); + j_biases_.resize(truncation_level_); + j_biases_pow_.resize(truncation_level_); + for (int i = 0; i < truncation_level_; ++i) { i_biases_[i] = 1.0f; i_biases_pow_[i] = 1.0f; j_biases_[i] = 1.0f; @@ -282,13 +311,13 @@ class LambdarankNDCG : public RankingObjective { } } - void InitPositionGradients() { /// - position_cnts_.resize(position_bins_); - position_scores_.resize(position_bins_); - position_lambdas_.resize(position_bins_); - i_costs_.resize(position_bins_); - j_costs_.resize(position_bins_); - for (size_t i = 0; i < position_bins_; ++i) { + void InitPositionGradients() { + position_cnts_.resize(truncation_level_); + position_scores_.resize(truncation_level_); + position_lambdas_.resize(truncation_level_); + i_costs_.resize(truncation_level_); + j_costs_.resize(truncation_level_); + for (int i = 0; i < truncation_level_; ++i) { position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; position_lambdas_[i] = 0.0f; @@ -297,18 +326,18 @@ class LambdarankNDCG : public RankingObjective { } for (int i = 0; i < num_threads_; i++) { - position_cnts_buffer_.emplace_back(position_bins_, 0LL); - position_scores_buffer_.emplace_back(position_bins_, 0.0f); - position_lambdas_buffer_.emplace_back(position_bins_, 0.0f); - i_costs_buffer_.emplace_back(position_bins_, 0.0f); - j_costs_buffer_.emplace_back(position_bins_, 0.0f); + position_cnts_buffer_.emplace_back(truncation_level_, 0LL); + position_scores_buffer_.emplace_back(truncation_level_, 0.0f); + position_lambdas_buffer_.emplace_back(truncation_level_, 0.0f); + i_costs_buffer_.emplace_back(truncation_level_, 0.0f); + j_costs_buffer_.emplace_back(truncation_level_, 0.0f); } } void UpdatePositionBiases() const { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { - for (size_t j = 0; j < position_bins_; ++j) { + for (int j = 0; j < truncation_level_; ++j) { position_cnts_[j] += position_cnts_buffer_[i][j]; position_scores_[j] += position_scores_buffer_[i][j]; position_lambdas_[j] += position_lambdas_buffer_[i][j]; @@ -319,13 +348,17 @@ class LambdarankNDCG : public RankingObjective { LogDebugPositionBiases(); - for (size_t i = 0; i < position_bins_; ++i) { + for (int i = 0; i < truncation_level_; ++i) { // Update bias - i_biases_[i] = i_costs_[i] / i_costs_[0]; - i_biases_pow_[i] = pow(i_biases_[i], eta_); + if (i_costs_[0] > kMinScore) { + i_biases_[i] = i_costs_[i] / i_costs_[0]; + i_biases_pow_[i] = pow(i_biases_[i], eta_); + } - j_biases_[i] = j_costs_[i] / j_costs_[0]; - j_biases_pow_[i] = pow(j_biases_[i], eta_); + if (j_costs_[0] > kMinScore) { + j_biases_[i] = j_costs_[i] / j_costs_[0]; + j_biases_pow_[i] = pow(j_biases_[i], eta_); + } // Clear position info position_cnts_[i] = 0LL; @@ -337,14 +370,14 @@ class LambdarankNDCG : public RankingObjective { // Clear Buffer for (int i = 0; i < num_threads_; i++) { - for (size_t j = 0; j < position_bins_; ++j) { + for (int j = 0; j < truncation_level_; ++j) { position_cnts_buffer_[i][j] = 0LL; position_scores_buffer_[i][j] = 0.0f; position_lambdas_buffer_[i][j] = 0.0f; i_costs_buffer_[i][j] = 0.0f; j_costs_buffer_[i][j] = 0.0f; } - } + } } const char* GetName() const override { return "lambdarank"; } @@ -352,7 +385,7 @@ class LambdarankNDCG : public RankingObjective { private: void LogDebugPositionBiases() const { long long position_cnts_sum = 0LL; - for (size_t i = 0; i < position_bins_; ++i) { + for (int i = 0; i < truncation_level_; ++i) { position_cnts_sum += position_cnts_[i]; } @@ -370,7 +403,7 @@ class LambdarankNDCG : public RankingObjective { << std::setw(15) << "j_cost"; Log::Debug(message_stream.str().c_str()); - for (size_t i = 0; i < position_bins_; ++i) { /// + for (int i = 0; i < truncation_level_; ++i) { /// message_stream << std::setw(10) << i << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] @@ -434,8 +467,6 @@ class LambdarankNDCG : public RankingObjective { bool unbiased_; /*! \brief Number of exponent */ double eta_; - /*! \brief Number of positions */ - size_t position_bins_; /*! \brief Number of threads */ int num_threads_; From 8827540c17548b30cb3c460a5cb6d48911808534 Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 13:26:20 -0500 Subject: [PATCH 08/32] chore: remove intermediate vectors --- src/objective/rank_objective.hpp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 3350d350cccb..a256709d870b 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -187,7 +187,6 @@ class LambdarankNDCG : public RankingObjective { // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } - // diff data_size_t high_rank, low_rank; if (label[sorted_idx[i]] > label[sorted_idx[j]]) { high_rank = i; @@ -233,6 +232,8 @@ class LambdarankNDCG : public RankingObjective { // that var that can be removed, lookup is fine i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; + + position_cnts_buffer_[tid][high_rank] += 1LL; } // update @@ -299,14 +300,10 @@ class LambdarankNDCG : public RankingObjective { } void InitPositionBiases() { - i_biases_.resize(truncation_level_); i_biases_pow_.resize(truncation_level_); - j_biases_.resize(truncation_level_); j_biases_pow_.resize(truncation_level_); for (int i = 0; i < truncation_level_; ++i) { - i_biases_[i] = 1.0f; i_biases_pow_[i] = 1.0f; - j_biases_[i] = 1.0f; j_biases_pow_[i] = 1.0f; } } @@ -350,16 +347,11 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < truncation_level_; ++i) { // Update bias - if (i_costs_[0] > kMinScore) { - i_biases_[i] = i_costs_[i] / i_costs_[0]; - i_biases_pow_[i] = pow(i_biases_[i], eta_); - } - - if (j_costs_[0] > kMinScore) { - j_biases_[i] = j_costs_[i] / j_costs_[0]; - j_biases_pow_[i] = pow(j_biases_[i], eta_); - } + i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], eta_); + j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], eta_); + } + for (int i = 0; i < truncation_level_; ++i) { // Clear position info position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; @@ -439,12 +431,10 @@ class LambdarankNDCG : public RankingObjective { double sigmoid_table_idx_factor_; // bias correction variables - mutable std::vector i_biases_; - /*! \brief pow position biases */ + /*! \brief power of position biases */ mutable std::vector i_biases_pow_; - mutable std::vector j_biases_; - /*! \brief pow position biases */ + /*! \brief power of position biases */ mutable std::vector j_biases_pow_; /*! \brief position cnts */ From 6798c2ac6dfe5f9f3a33f82dca71a3724ae24cf0 Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 13:48:57 -0500 Subject: [PATCH 09/32] chore: address linter issues --- src/objective/rank_objective.hpp | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index a256709d870b..fa5e2bfb8491 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -154,7 +154,7 @@ class LambdarankNDCG : public RankingObjective { const label_t* label, const double* score, score_t* lambdas, score_t* hessians) const override { - const int tid = omp_get_thread_num(); // get thread ID + const int tid = omp_get_thread_num(); // get thread id // get max DCG on current query const double inverse_max_dcg = inverse_max_dcgs_[query_id]; @@ -225,10 +225,10 @@ class LambdarankNDCG : public RankingObjective { if (unbiased_) { // check that 1.0 instead of 2.0 is ok // might need a sigmoid_ thrown in somewhere - double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; /// log(1+e^(-sigma*(si-sj))) + double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; // log(1+e^(-sigma*(si-sj))) // orig has += high_sum_cost_i - // but that is just an in loop accumulator to avoid element look up + // but that is just an in loop accumulator to avoid element look up // that var that can be removed, lookup is fine i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; @@ -242,7 +242,7 @@ class LambdarankNDCG : public RankingObjective { // orig has 2.0 * delta / bias related to always defaulting sigmoid to 2 // this has a sigmoid_^2, check impact - p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; ; + p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; lambdas[low] -= static_cast(p_lambda); hessians[low] += static_cast(p_hessian); @@ -264,7 +264,7 @@ class LambdarankNDCG : public RankingObjective { if (unbiased_) { // calculate position score, and position lambda - for (data_size_t i = 0; i < cnt && i < truncation_level_; ++i) { /// + for (data_size_t i = 0; i < cnt && i < truncation_level_; ++i) { position_scores_buffer_[tid][i] += score[i]; position_lambdas_buffer_[tid][i] += lambdas[i]; } @@ -299,7 +299,7 @@ class LambdarankNDCG : public RankingObjective { } } - void InitPositionBiases() { + void InitPositionBiases() { i_biases_pow_.resize(truncation_level_); j_biases_pow_.resize(truncation_level_); for (int i = 0; i < truncation_level_; ++i) { @@ -308,7 +308,7 @@ class LambdarankNDCG : public RankingObjective { } } - void InitPositionGradients() { + void InitPositionGradients() { position_cnts_.resize(truncation_level_); position_scores_.resize(truncation_level_); position_lambdas_.resize(truncation_level_); @@ -376,7 +376,7 @@ class LambdarankNDCG : public RankingObjective { private: void LogDebugPositionBiases() const { - long long position_cnts_sum = 0LL; + int64_t position_cnts_sum = 0LL; for (int i = 0; i < truncation_level_; ++i) { position_cnts_sum += position_cnts_[i]; } @@ -384,18 +384,18 @@ class LambdarankNDCG : public RankingObjective { Log::Debug(""); Log::Debug("eta: %.1f, position_cnts_sum: %i", eta_, position_cnts_sum); - std::stringstream message_stream; - message_stream << std::setw(10) << "position" + std::stringstream message_stream; + message_stream << std::setw(10) << "position" << std::setw(15) << "bias_i" << std::setw(15) << "bias_j" - << std::setw(15) << "score" - << std::setw(15) << "lambda" + << std::setw(15) << "score" + << std::setw(15) << "lambda" << std::setw(15) << "high_pair_cnt" << std::setw(15) << "i_cost" << std::setw(15) << "j_cost"; Log::Debug(message_stream.str().c_str()); - for (int i = 0; i < truncation_level_; ++i) { /// + for (int i = 0; i < truncation_level_; ++i) { message_stream << std::setw(10) << i << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] @@ -432,26 +432,26 @@ class LambdarankNDCG : public RankingObjective { // bias correction variables /*! \brief power of position biases */ - mutable std::vector i_biases_pow_; + mutable std::vector i_biases_pow_; /*! \brief power of position biases */ - mutable std::vector j_biases_pow_; + mutable std::vector j_biases_pow_; /*! \brief position cnts */ - mutable std::vector position_cnts_; - mutable std::vector> position_cnts_buffer_; + mutable std::vector position_cnts_; + mutable std::vector> position_cnts_buffer_; /*! \brief position scores */ mutable std::vector position_scores_; mutable std::vector> position_scores_buffer_; /*! \brief position lambdas */ mutable std::vector position_lambdas_; mutable std::vector> position_lambdas_buffer_; - // mutable double position cost; - mutable std::vector i_costs_; - mutable std::vector> i_costs_buffer_; + // mutable double position cost; + mutable std::vector i_costs_; + mutable std::vector> i_costs_buffer_; - mutable std::vector j_costs_; - mutable std::vector> j_costs_buffer_; + mutable std::vector j_costs_; + mutable std::vector> j_costs_buffer_; /*! \brief Should use unbiased lambdarank */ bool unbiased_; From e1c81585cfe3f3e804faf8314fdc1fd0110d1d02 Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 16:20:45 -0500 Subject: [PATCH 10/32] chore: remove unused position_lambdas variable --- src/objective/rank_objective.hpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index fa5e2bfb8491..f6f2f37e0a34 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -139,6 +139,10 @@ class LambdarankNDCG : public RankingObjective { } // construct sigmoid table to speed up sigmoid transform ConstructSigmoidTable(); + + // initialize position bias vectors + InitPositionBiases(); + InitPositionGradients(); } void GetGradients(const double* score, score_t* gradients, @@ -266,7 +270,6 @@ class LambdarankNDCG : public RankingObjective { // calculate position score, and position lambda for (data_size_t i = 0; i < cnt && i < truncation_level_; ++i) { position_scores_buffer_[tid][i] += score[i]; - position_lambdas_buffer_[tid][i] += lambdas[i]; } } } @@ -311,13 +314,11 @@ class LambdarankNDCG : public RankingObjective { void InitPositionGradients() { position_cnts_.resize(truncation_level_); position_scores_.resize(truncation_level_); - position_lambdas_.resize(truncation_level_); i_costs_.resize(truncation_level_); j_costs_.resize(truncation_level_); for (int i = 0; i < truncation_level_; ++i) { position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; - position_lambdas_[i] = 0.0f; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } @@ -325,7 +326,6 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < num_threads_; i++) { position_cnts_buffer_.emplace_back(truncation_level_, 0LL); position_scores_buffer_.emplace_back(truncation_level_, 0.0f); - position_lambdas_buffer_.emplace_back(truncation_level_, 0.0f); i_costs_buffer_.emplace_back(truncation_level_, 0.0f); j_costs_buffer_.emplace_back(truncation_level_, 0.0f); } @@ -337,7 +337,6 @@ class LambdarankNDCG : public RankingObjective { for (int j = 0; j < truncation_level_; ++j) { position_cnts_[j] += position_cnts_buffer_[i][j]; position_scores_[j] += position_scores_buffer_[i][j]; - position_lambdas_[j] += position_lambdas_buffer_[i][j]; i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; } @@ -355,7 +354,6 @@ class LambdarankNDCG : public RankingObjective { // Clear position info position_cnts_[i] = 0LL; position_scores_[i] = 0.0f; - position_lambdas_[i] = 0.0f; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } @@ -365,7 +363,6 @@ class LambdarankNDCG : public RankingObjective { for (int j = 0; j < truncation_level_; ++j) { position_cnts_buffer_[i][j] = 0LL; position_scores_buffer_[i][j] = 0.0f; - position_lambdas_buffer_[i][j] = 0.0f; i_costs_buffer_[i][j] = 0.0f; j_costs_buffer_[i][j] = 0.0f; } @@ -400,7 +397,6 @@ class LambdarankNDCG : public RankingObjective { << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] << std::setw(15) << position_scores_[i] / num_queries_ - << std::setw(15) << -position_lambdas_[i] / num_queries_ << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum << std::setw(15) << i_costs_[i] / position_cnts_sum << std::setw(15) << j_costs_[i] / position_cnts_sum @@ -443,9 +439,7 @@ class LambdarankNDCG : public RankingObjective { /*! \brief position scores */ mutable std::vector position_scores_; mutable std::vector> position_scores_buffer_; - /*! \brief position lambdas */ - mutable std::vector position_lambdas_; - mutable std::vector> position_lambdas_buffer_; + // mutable double position cost; mutable std::vector i_costs_; mutable std::vector> i_costs_buffer_; From 26b316bd8967495753865566c95645ad4bc5220e Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 16:22:27 -0500 Subject: [PATCH 11/32] chore: remove unused position_scores variables --- src/objective/rank_objective.hpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index f6f2f37e0a34..78cb5dce39b7 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -265,13 +265,6 @@ class LambdarankNDCG : public RankingObjective { hessians[i] = static_cast(hessians[i] * norm_factor); } } - - if (unbiased_) { - // calculate position score, and position lambda - for (data_size_t i = 0; i < cnt && i < truncation_level_; ++i) { - position_scores_buffer_[tid][i] += score[i]; - } - } } inline double GetSigmoid(double score) const { @@ -313,19 +306,16 @@ class LambdarankNDCG : public RankingObjective { void InitPositionGradients() { position_cnts_.resize(truncation_level_); - position_scores_.resize(truncation_level_); i_costs_.resize(truncation_level_); j_costs_.resize(truncation_level_); for (int i = 0; i < truncation_level_; ++i) { position_cnts_[i] = 0LL; - position_scores_[i] = 0.0f; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } for (int i = 0; i < num_threads_; i++) { position_cnts_buffer_.emplace_back(truncation_level_, 0LL); - position_scores_buffer_.emplace_back(truncation_level_, 0.0f); i_costs_buffer_.emplace_back(truncation_level_, 0.0f); j_costs_buffer_.emplace_back(truncation_level_, 0.0f); } @@ -336,7 +326,6 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { position_cnts_[j] += position_cnts_buffer_[i][j]; - position_scores_[j] += position_scores_buffer_[i][j]; i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; } @@ -353,7 +342,6 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < truncation_level_; ++i) { // Clear position info position_cnts_[i] = 0LL; - position_scores_[i] = 0.0f; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } @@ -362,7 +350,6 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { position_cnts_buffer_[i][j] = 0LL; - position_scores_buffer_[i][j] = 0.0f; i_costs_buffer_[i][j] = 0.0f; j_costs_buffer_[i][j] = 0.0f; } @@ -396,7 +383,6 @@ class LambdarankNDCG : public RankingObjective { message_stream << std::setw(10) << i << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] - << std::setw(15) << position_scores_[i] / num_queries_ << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum << std::setw(15) << i_costs_[i] / position_cnts_sum << std::setw(15) << j_costs_[i] / position_cnts_sum @@ -436,9 +422,6 @@ class LambdarankNDCG : public RankingObjective { /*! \brief position cnts */ mutable std::vector position_cnts_; mutable std::vector> position_cnts_buffer_; - /*! \brief position scores */ - mutable std::vector position_scores_; - mutable std::vector> position_scores_buffer_; // mutable double position cost; mutable std::vector i_costs_; From 4b8c2e9b0ff15efec6940ccd6823171393d845bf Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 9 Feb 2021 16:32:19 -0500 Subject: [PATCH 12/32] chore: remove position counts variables --- src/objective/rank_objective.hpp | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 78cb5dce39b7..5137e0e2161a 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -236,8 +236,6 @@ class LambdarankNDCG : public RankingObjective { // that var that can be removed, lookup is fine i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; - - position_cnts_buffer_[tid][high_rank] += 1LL; } // update @@ -305,17 +303,14 @@ class LambdarankNDCG : public RankingObjective { } void InitPositionGradients() { - position_cnts_.resize(truncation_level_); i_costs_.resize(truncation_level_); j_costs_.resize(truncation_level_); for (int i = 0; i < truncation_level_; ++i) { - position_cnts_[i] = 0LL; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } for (int i = 0; i < num_threads_; i++) { - position_cnts_buffer_.emplace_back(truncation_level_, 0LL); i_costs_buffer_.emplace_back(truncation_level_, 0.0f); j_costs_buffer_.emplace_back(truncation_level_, 0.0f); } @@ -325,7 +320,6 @@ class LambdarankNDCG : public RankingObjective { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { - position_cnts_[j] += position_cnts_buffer_[i][j]; i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; } @@ -341,7 +335,6 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < truncation_level_; ++i) { // Clear position info - position_cnts_[i] = 0LL; i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } @@ -349,7 +342,6 @@ class LambdarankNDCG : public RankingObjective { // Clear Buffer for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { - position_cnts_buffer_[i][j] = 0LL; i_costs_buffer_[i][j] = 0.0f; j_costs_buffer_[i][j] = 0.0f; } @@ -360,21 +352,10 @@ class LambdarankNDCG : public RankingObjective { private: void LogDebugPositionBiases() const { - int64_t position_cnts_sum = 0LL; - for (int i = 0; i < truncation_level_; ++i) { - position_cnts_sum += position_cnts_[i]; - } - - Log::Debug(""); - Log::Debug("eta: %.1f, position_cnts_sum: %i", eta_, position_cnts_sum); - std::stringstream message_stream; message_stream << std::setw(10) << "position" << std::setw(15) << "bias_i" << std::setw(15) << "bias_j" - << std::setw(15) << "score" - << std::setw(15) << "lambda" - << std::setw(15) << "high_pair_cnt" << std::setw(15) << "i_cost" << std::setw(15) << "j_cost"; Log::Debug(message_stream.str().c_str()); @@ -383,9 +364,8 @@ class LambdarankNDCG : public RankingObjective { message_stream << std::setw(10) << i << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] - << std::setw(15) << 1.0f * position_cnts_[i] / position_cnts_sum - << std::setw(15) << i_costs_[i] / position_cnts_sum - << std::setw(15) << j_costs_[i] / position_cnts_sum + << std::setw(15) << i_costs_[i] + << std::setw(15) << j_costs_[i] << std::endl; Log::Debug(message_stream.str().c_str()); } @@ -419,10 +399,6 @@ class LambdarankNDCG : public RankingObjective { /*! \brief power of position biases */ mutable std::vector j_biases_pow_; - /*! \brief position cnts */ - mutable std::vector position_cnts_; - mutable std::vector> position_cnts_buffer_; - // mutable double position cost; mutable std::vector i_costs_; mutable std::vector> i_costs_buffer_; From c50e92cb0bb731e037d5aa4f0cb639d81b214d72 Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 10 Feb 2021 09:51:06 -0500 Subject: [PATCH 13/32] chore: consolidate initialization and updates --- src/objective/rank_objective.hpp | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 5137e0e2161a..5d349e7c55ed 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -141,8 +141,7 @@ class LambdarankNDCG : public RankingObjective { ConstructSigmoidTable(); // initialize position bias vectors - InitPositionBiases(); - InitPositionGradients(); + InitPositionBiasesAndGradients(); } void GetGradients(const double* score, score_t* gradients, @@ -150,7 +149,7 @@ class LambdarankNDCG : public RankingObjective { RankingObjective::GetGradients(score, gradients, hessians); if (unbiased_) { - UpdatePositionBiases(); + UpdatePositionBiasesAndGradients(); } } @@ -293,35 +292,40 @@ class LambdarankNDCG : public RankingObjective { } } - void InitPositionBiases() { + void InitPositionBiasesAndGradients() { i_biases_pow_.resize(truncation_level_); j_biases_pow_.resize(truncation_level_); + i_costs_.resize(truncation_level_); + j_costs_.resize(truncation_level_); + for (int i = 0; i < truncation_level_; ++i) { + // init position biases i_biases_pow_[i] = 1.0f; j_biases_pow_[i] = 1.0f; - } - } - void InitPositionGradients() { - i_costs_.resize(truncation_level_); - j_costs_.resize(truncation_level_); - for (int i = 0; i < truncation_level_; ++i) { + // init position gradients i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } + // init gradient buffers for gathering results across threads for (int i = 0; i < num_threads_; i++) { i_costs_buffer_.emplace_back(truncation_level_, 0.0f); j_costs_buffer_.emplace_back(truncation_level_, 0.0f); } } - void UpdatePositionBiases() const { + void UpdatePositionBiasesAndGradients() const { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { + i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; + + // clear buffer for next run + i_costs_buffer_[i][j] = 0.0f; + j_costs_buffer_[i][j] = 0.0f; } } @@ -338,14 +342,6 @@ class LambdarankNDCG : public RankingObjective { i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; } - - // Clear Buffer - for (int i = 0; i < num_threads_; i++) { - for (int j = 0; j < truncation_level_; ++j) { - i_costs_buffer_[i][j] = 0.0f; - j_costs_buffer_[i][j] = 0.0f; - } - } } const char* GetName() const override { return "lambdarank"; } From 592ade691ec065bc98281728a779fea17e213a3e Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 10 Feb 2021 11:11:32 -0500 Subject: [PATCH 14/32] chore: linter whitespace --- src/objective/rank_objective.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 5d349e7c55ed..43a82ec5a236 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -319,7 +319,6 @@ class LambdarankNDCG : public RankingObjective { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { for (int j = 0; j < truncation_level_; ++j) { - i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; From c29a6f8493d5d8e5e3fca1ef4af4ee68ebefe74a Mon Sep 17 00:00:00 2001 From: robhowley Date: Thu, 11 Feb 2021 13:32:11 -0500 Subject: [PATCH 15/32] chore: add comments on formulas and derivations --- src/objective/rank_objective.hpp | 146 +++++++++++++++++++++++++++---- 1 file changed, 128 insertions(+), 18 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 43a82ec5a236..6084b744af0c 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -157,36 +157,65 @@ class LambdarankNDCG : public RankingObjective { const label_t* label, const double* score, score_t* lambdas, score_t* hessians) const override { + // + // query_id : the query for which we are computing gradients + // cnt : number of documents returned for the query + // label : the Y values (relevance labels) for each document + // score : current predicted score for the associated document + // lambdas : array updated in place, gradients for this query + // hessians : array updated in place, second derivs for this query + // + + // queries are processed in parallel + // get id for current thread so safely accumulate bias corrections const int tid = omp_get_thread_num(); // get thread id // get max DCG on current query const double inverse_max_dcg = inverse_max_dcgs_[query_id]; + // initialize with zero for (data_size_t i = 0; i < cnt; ++i) { lambdas[i] = 0.0f; hessians[i] = 0.0f; } + // get sorted indices for scores + // by first fill the vector 0, 1, ... cnt-1 std::vector sorted_idx(cnt); for (data_size_t i = 0; i < cnt; ++i) { sorted_idx[i] = i; } + + // and then sort the result indices by score descending + // eg [3, 2, 4, 1] means document 3 currently has highest score, document 1 lowest std::stable_sort( sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); + // get best and worst score const double best_score = score[sorted_idx[0]]; + + // worst score should be last item of sorted_idx + // if that item is env min score (-inf), take the one before it data_size_t worst_idx = cnt - 1; if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) { worst_idx -= 1; } const double worst_score = score[sorted_idx[worst_idx]]; + + // accumulator for lambdas used in normalization when norm_ = true double sum_lambdas = 0.0; + // start accmulate lambdas by pairs that contain at least one document above truncation level + // working across the cnt number of documents for the query + // this going in order of score desc since start w sorted_idx[0] for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { if (score[sorted_idx[i]] == kMinScore) { continue; } + + // compare doc i to all other docs j of differing level of relevance for (data_size_t j = i + 1; j < cnt; ++j) { if (score[sorted_idx[j]] == kMinScore) { continue; } + // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } @@ -198,51 +227,132 @@ class LambdarankNDCG : public RankingObjective { high_rank = j; low_rank = i; } - const data_size_t high = sorted_idx[high_rank]; - const int high_label = static_cast(label[high]); + + // info of more relevant doc + const data_size_t high = sorted_idx[high_rank]; // doc index in query results + const int high_label = static_cast(label[high]); // label (Y) const double high_score = score[high]; - const double high_label_gain = label_gain_[high_label]; - const double high_discount = DCGCalculator::GetDiscount(high_rank); + const double high_label_gain = label_gain_[high_label]; // default: 2^high_label - 1 + const double high_discount = DCGCalculator::GetDiscount(high_rank); // 1/log2(2 + i) + + // info of less relevant doc const data_size_t low = sorted_idx[low_rank]; const int low_label = static_cast(label[low]); const double low_score = score[low]; const double low_label_gain = label_gain_[low_label]; const double low_discount = DCGCalculator::GetDiscount(low_rank); + // + // note on subsequent comments + // in the papers, we assume i is more relevant than j + // formula numbers are from unbiased lambdamart paper + // + // si - sj const double delta_score = high_score - low_score; // get dcg gap + // default: 2^i - 2^j > 0 const double dcg_gap = high_label_gain - low_label_gain; + // get discount of this pair + // |1/log2(2 + i) - 1/log2(2 + j)| const double paired_discount = fabs(high_discount - low_discount); + // get delta NDCG + // (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| / max_dcg double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; - // regular the delta_pair_NDCG by score distance + + // regularize the delta_pair_NDCG by score distance if ((norm_ || unbiased_) && best_score != worst_score) { delta_pair_NDCG /= (0.01f + fabs(delta_score)); } + // calculate lambda for this pair - double p_lambda = GetSigmoid(delta_score); - double p_hessian = p_lambda * (1.0f - p_lambda); // check that 1.0 instead of 2.0 is ok + // part of (34) + // (34) and (36) are used to get the unbiased gradient estimates + // in original this first p_lambda is double what it should be but ends up not mattering + double p_lambda = GetSigmoid(delta_score); // 1 / (1 + e^(sigmoid_ * (si - sj))) + + // d/dx {part of (34)} from above + // ** confirmed wrong in original ** + // see subsequent p_hessian comments, but appears to be wrong + // if sigmoid_ was meant to be 2 in the paper, that would be multiplied out front + // it wouldn't be lambda * (2 - lambda) but instead 2 * lambda (1 - lambda) + double p_hessian = p_lambda * (1.0f - p_lambda); if (unbiased_) { - // check that 1.0 instead of 2.0 is ok - // might need a sigmoid_ thrown in somewhere - double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; // log(1+e^(-sigma*(si-sj))) - - // orig has += high_sum_cost_i - // but that is just an in loop accumulator to avoid element look up - // that var that can be removed, lookup is fine + // formula (37) + // used to get t+ and t- from (30)/(31) respectively + // ** confirmed correct here and (accidentally) in original ** + // orig has log(2/(2 - p_lambda)) + // let bad_exp = e^(2 * sigmoid_ * (si - sj)) + // let bad_denom = 1 + bad_exp + // + // 2/(2-(2/bad_denom)) + // 2 / { (2*bad_denom - 2)/bad_denom } + // 2*bad_denom / (2 * (bad_denom - 1)) + // bad_denom / bad_exp + // {1 + bad_exp} / bad_exp + // 1 + 1/bad_exp + // 1 + e^{-2*sigmoid_ * (si - sj)} + // ... so i think w the weird swaps/hard coded 2s and sigmoid_ = 1 2/(2-lambda) is right + // ... which means 1/(1-lambda) is correct here + double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; + + // formula (30) + // more relevant (clicked) gets debiased by less relevant (unclicked) i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; + + // formula (31) + // and vice versa j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; } // update + // ** confirmed p_lambda is correct ** + // rest of (34) with formula (36) for debiasing // orig doesn't have sigmoid_ + // if not unbiased_ + // {1/(1 + e^(sigmoid_ * (si - sj)))} * -sigmoid_ * (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| * (1/max_dcg) + // note that orig has + // {2/(1 + e^(2 * sigmoid_ * (si - sj)))} * -1 * (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| * (1/max_dcg) + // the 2 in the numerator and sigmoid_ missing from second term (delta_pair_NDCG) even out + // the 2 * sigmoid_ * (si - sj) in the exponent, however, makes no sense + // it appears the tests on that repo used an unset (default) sigmoid config value, which is 1 + // this means that the paper's sigmoid_table_ denominator was computed correctly for sigmoid_ = 2 + // as is described at (34) even though it was set for 1 + // also means that leaving it out from p_lambda was (accidentally) fine p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; - // orig has 2.0 * delta / bias related to always defaulting sigmoid to 2 - // this has a sigmoid_^2, check impact + // remainder of d/dx {(34) and (36) for debiasing} + // ** confirmed wrong ** + // if not unbiased + // let good_exp = e^(sigmoid_ * (si - sj)) + // let good_denom = 1 + good_exp + // + // p_lambda * (1.0f - p_lambda) * sigmoid_ * sigmoid_ * delta_pair_ndcg + // {1/good_denom} * (1 - 1/good_denom) * sigmoid_ * sigmoid_ * delta_pair_ndcg + // {1/good_denom} * ((good_denom - 1)/good_denom) * sigmoid_ * sigmoid_ * delta_pair_ndcg + // sigmoid_ * sigmoid_ * good_exp * delta_pair_ndcg / {good_denom^2} + // + // orig has + // let bad_exp = e^(2 * sigmoid_ * (si - sj)) + // let bad_denom = 1 + bad_exp + // + // p_lambda * (2 - p_lambda) * 2 * delta_pair_ndcg + // {2/bad_denom} * (2 - (2/bad_denom)) * 2 * delta_pair_ndcg + // {2/bad_denom} * (2*(bad_denom - 1)/bad_denom) * 2 * delta_pair_ndcg + // 2 * 2 * 2 * (bad_denom - 1) * delta_pair_ndcg / (bad_denom^2) + // 2 * 2 * 2 * bad_exp * delta_pair_ndcg / (bad_denom^2) + // + // if, as in the original ... + // * you WANT sigmoid_ = 2 + // * but actually leave it as 1 + // * and add 2s in as hardcoded constants + // then you end up w bad_denom == good_denom and + // 2 * 2 * 2 * good_exp * delta_pair_ndcg / (good_denom^2) + // 2 * sigmoid_ * sigmoid_ * good_exp * delta_pair_ndcg / {good_denom^2} + // and this has 1 too many 2s compared to what's here p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; lambdas[low] -= static_cast(p_lambda); @@ -388,10 +498,10 @@ class LambdarankNDCG : public RankingObjective { double sigmoid_table_idx_factor_; // bias correction variables - /*! \brief power of position biases */ + /*! \brief power of (click) position biases */ mutable std::vector i_biases_pow_; - /*! \brief power of position biases */ + /*! \brief power of (unclick) position biases */ mutable std::vector j_biases_pow_; // mutable double position cost; From 9796a728089e34df502350a3d210801c34e53ad3 Mon Sep 17 00:00:00 2001 From: robhowley Date: Thu, 11 Feb 2021 13:57:46 -0500 Subject: [PATCH 16/32] chore: eta has slightly clearer/diff meaning in this impl --- src/objective/rank_objective.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 6084b744af0c..b769baa29124 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -116,6 +116,7 @@ class LambdarankNDCG : public RankingObjective { } num_threads_ = omp_get_num_threads(); + position_bias_regularizer = 1.0f / (1.0f + eta_); } explicit LambdarankNDCG(const std::vector& strs) @@ -442,8 +443,8 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < truncation_level_; ++i) { // Update bias - i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], eta_); - j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], eta_); + i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], position_bias_regularizer); + j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], position_bias_regularizer); } for (int i = 0; i < truncation_level_; ++i) { @@ -516,6 +517,9 @@ class LambdarankNDCG : public RankingObjective { /*! \brief Number of exponent */ double eta_; + /*! \brief position bias regularize exponent, 1 / (1 + eta) */ + double position_bias_regularizer; + /*! \brief Number of threads */ int num_threads_; }; From a00ac5cbd20dc77bb7cb744d095e197be3828de1 Mon Sep 17 00:00:00 2001 From: robhowley Date: Thu, 18 Feb 2021 21:22:55 -0500 Subject: [PATCH 17/32] fix: debias rank values --- src/objective/rank_objective.hpp | 128 ++++++++++--------------------- 1 file changed, 42 insertions(+), 86 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index b769baa29124..e60bfda9fafa 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -115,7 +115,12 @@ class LambdarankNDCG : public RankingObjective { Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_); } - num_threads_ = omp_get_num_threads(); + #pragma omp parallel + #pragma omp master + { + num_threads_ = omp_get_num_threads(); + } + position_bias_regularizer = 1.0f / (1.0f + eta_); } @@ -149,9 +154,7 @@ class LambdarankNDCG : public RankingObjective { score_t* hessians) const override { RankingObjective::GetGradients(score, gradients, hessians); - if (unbiased_) { - UpdatePositionBiasesAndGradients(); - } + if (unbiased_) { UpdatePositionBiasesAndGradients(); } } inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, @@ -207,7 +210,7 @@ class LambdarankNDCG : public RankingObjective { // accumulator for lambdas used in normalization when norm_ = true double sum_lambdas = 0.0; - // start accmulate lambdas by pairs that contain at least one document above truncation level + // accmulate lambdas by pairs that contain at least one document above truncation level // working across the cnt number of documents for the query // this going in order of score desc since start w sorted_idx[0] for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { @@ -220,6 +223,7 @@ class LambdarankNDCG : public RankingObjective { // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } + // determine more relevant document pair data_size_t high_rank, low_rank; if (label[sorted_idx[i]] > label[sorted_idx[j]]) { high_rank = i; @@ -232,7 +236,7 @@ class LambdarankNDCG : public RankingObjective { // info of more relevant doc const data_size_t high = sorted_idx[high_rank]; // doc index in query results const int high_label = static_cast(label[high]); // label (Y) - const double high_score = score[high]; + const double high_score = score[high]; // current model predicted score const double high_label_gain = label_gain_[high_label]; // default: 2^high_label - 1 const double high_discount = DCGCalculator::GetDiscount(high_rank); // 1/log2(2 + i) @@ -245,7 +249,7 @@ class LambdarankNDCG : public RankingObjective { // // note on subsequent comments - // in the papers, we assume i is more relevant than j + // in the papers, customary to assume i is more relevant than j // formula numbers are from unbiased lambdamart paper // // si - sj @@ -271,90 +275,33 @@ class LambdarankNDCG : public RankingObjective { // calculate lambda for this pair // part of (34) // (34) and (36) are used to get the unbiased gradient estimates - // in original this first p_lambda is double what it should be but ends up not mattering double p_lambda = GetSigmoid(delta_score); // 1 / (1 + e^(sigmoid_ * (si - sj))) // d/dx {part of (34)} from above - // ** confirmed wrong in original ** - // see subsequent p_hessian comments, but appears to be wrong - // if sigmoid_ was meant to be 2 in the paper, that would be multiplied out front - // it wouldn't be lambda * (2 - lambda) but instead 2 * lambda (1 - lambda) double p_hessian = p_lambda * (1.0f - p_lambda); + int debias_high_rank = static_cast(std::min(high, truncation_level_ - 1)); + int debias_low_rank = static_cast(std::min(low, truncation_level_ - 1)); + if (unbiased_) { // formula (37) // used to get t+ and t- from (30)/(31) respectively - // ** confirmed correct here and (accidentally) in original ** - // orig has log(2/(2 - p_lambda)) - // let bad_exp = e^(2 * sigmoid_ * (si - sj)) - // let bad_denom = 1 + bad_exp - // - // 2/(2-(2/bad_denom)) - // 2 / { (2*bad_denom - 2)/bad_denom } - // 2*bad_denom / (2 * (bad_denom - 1)) - // bad_denom / bad_exp - // {1 + bad_exp} / bad_exp - // 1 + 1/bad_exp - // 1 + e^{-2*sigmoid_ * (si - sj)} - // ... so i think w the weird swaps/hard coded 2s and sigmoid_ = 1 2/(2-lambda) is right - // ... which means 1/(1-lambda) is correct here double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; // formula (30) // more relevant (clicked) gets debiased by less relevant (unclicked) - i_costs_buffer_[tid][high_rank] += p_cost / j_biases_pow_[low_rank]; + i_costs_buffer_[tid][debias_high_rank] += p_cost / j_biases_pow_[debias_low_rank]; - // formula (31) - // and vice versa - j_costs_buffer_[tid][low_rank] += p_cost / i_biases_pow_[high_rank]; + // // formula (31) + // // and vice versa + j_costs_buffer_[tid][debias_low_rank] += p_cost / i_biases_pow_[debias_high_rank]; } - // update - // ** confirmed p_lambda is correct ** - // rest of (34) with formula (36) for debiasing - // orig doesn't have sigmoid_ - // if not unbiased_ - // {1/(1 + e^(sigmoid_ * (si - sj)))} * -sigmoid_ * (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| * (1/max_dcg) - // note that orig has - // {2/(1 + e^(2 * sigmoid_ * (si - sj)))} * -1 * (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| * (1/max_dcg) - // the 2 in the numerator and sigmoid_ missing from second term (delta_pair_NDCG) even out - // the 2 * sigmoid_ * (si - sj) in the exponent, however, makes no sense - // it appears the tests on that repo used an unset (default) sigmoid config value, which is 1 - // this means that the paper's sigmoid_table_ denominator was computed correctly for sigmoid_ = 2 - // as is described at (34) even though it was set for 1 - // also means that leaving it out from p_lambda was (accidentally) fine - p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; + // update {(34) and (36) for debiasing} + p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; // remainder of d/dx {(34) and (36) for debiasing} - // ** confirmed wrong ** - // if not unbiased - // let good_exp = e^(sigmoid_ * (si - sj)) - // let good_denom = 1 + good_exp - // - // p_lambda * (1.0f - p_lambda) * sigmoid_ * sigmoid_ * delta_pair_ndcg - // {1/good_denom} * (1 - 1/good_denom) * sigmoid_ * sigmoid_ * delta_pair_ndcg - // {1/good_denom} * ((good_denom - 1)/good_denom) * sigmoid_ * sigmoid_ * delta_pair_ndcg - // sigmoid_ * sigmoid_ * good_exp * delta_pair_ndcg / {good_denom^2} - // - // orig has - // let bad_exp = e^(2 * sigmoid_ * (si - sj)) - // let bad_denom = 1 + bad_exp - // - // p_lambda * (2 - p_lambda) * 2 * delta_pair_ndcg - // {2/bad_denom} * (2 - (2/bad_denom)) * 2 * delta_pair_ndcg - // {2/bad_denom} * (2*(bad_denom - 1)/bad_denom) * 2 * delta_pair_ndcg - // 2 * 2 * 2 * (bad_denom - 1) * delta_pair_ndcg / (bad_denom^2) - // 2 * 2 * 2 * bad_exp * delta_pair_ndcg / (bad_denom^2) - // - // if, as in the original ... - // * you WANT sigmoid_ = 2 - // * but actually leave it as 1 - // * and add 2s in as hardcoded constants - // then you end up w bad_denom == good_denom and - // 2 * 2 * 2 * good_exp * delta_pair_ndcg / (good_denom^2) - // 2 * sigmoid_ * sigmoid_ * good_exp * delta_pair_ndcg / {good_denom^2} - // and this has 1 too many 2s compared to what's here - p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[high_rank] / j_biases_pow_[low_rank]; + p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; lambdas[low] -= static_cast(p_lambda); hessians[low] += static_cast(p_hessian); @@ -365,7 +312,7 @@ class LambdarankNDCG : public RankingObjective { sum_lambdas -= 2 * p_lambda; } } - + if (norm_ && sum_lambdas > 0) { double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas; for (data_size_t i = 0; i < cnt; ++i) { @@ -429,25 +376,29 @@ class LambdarankNDCG : public RankingObjective { void UpdatePositionBiasesAndGradients() const { // accumulate the parallel results for (int i = 0; i < num_threads_; i++) { - for (int j = 0; j < truncation_level_; ++j) { + for (int j = 0; j < truncation_level_; j++) { i_costs_[j] += i_costs_buffer_[i][j]; j_costs_[j] += j_costs_buffer_[i][j]; + } + } + for (int i = 0; i < num_threads_; i++) { + for (int j = 0; j < truncation_level_; j++) { // clear buffer for next run i_costs_buffer_[i][j] = 0.0f; j_costs_buffer_[i][j] = 0.0f; } } - LogDebugPositionBiases(); - - for (int i = 0; i < truncation_level_; ++i) { + for (int i = 0; i < truncation_level_; i++) { // Update bias i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], position_bias_regularizer); j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], position_bias_regularizer); } - for (int i = 0; i < truncation_level_; ++i) { + LogDebugPositionBiases(); + + for (int i = 0; i < truncation_level_; i++) { // Clear position info i_costs_[i] = 0.0f; j_costs_[i] = 0.0f; @@ -463,17 +414,19 @@ class LambdarankNDCG : public RankingObjective { << std::setw(15) << "bias_i" << std::setw(15) << "bias_j" << std::setw(15) << "i_cost" - << std::setw(15) << "j_cost"; + << std::setw(15) << "j_cost" + << std::endl; Log::Debug(message_stream.str().c_str()); + message_stream.str(""); for (int i = 0; i < truncation_level_; ++i) { message_stream << std::setw(10) << i << std::setw(15) << i_biases_pow_[i] << std::setw(15) << j_biases_pow_[i] << std::setw(15) << i_costs_[i] - << std::setw(15) << j_costs_[i] - << std::endl; + << std::setw(15) << j_costs_[i]; Log::Debug(message_stream.str().c_str()); + message_stream.str(""); } } @@ -512,12 +465,15 @@ class LambdarankNDCG : public RankingObjective { mutable std::vector j_costs_; mutable std::vector> j_costs_buffer_; - /*! \brief Should use unbiased lambdarank */ + /*! + * \brief Should use lambdarank with position bias correction + * [arxiv.org/pdf/1809.05818.pdf] + */ bool unbiased_; - /*! \brief Number of exponent */ + /*! \brief Position bias regularizer norm */ double eta_; - /*! \brief position bias regularize exponent, 1 / (1 + eta) */ + /*! \brief Position bias regularizer exponent, 1 / (1 + eta) */ double position_bias_regularizer; /*! \brief Number of threads */ From 672ec5bb6b950a59a4f1c5e4d43e8aaede658656 Mon Sep 17 00:00:00 2001 From: robhowley Date: Thu, 18 Feb 2021 21:26:51 -0500 Subject: [PATCH 18/32] chore: linter --- src/objective/rank_objective.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index e60bfda9fafa..4899ca503354 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -281,7 +281,7 @@ class LambdarankNDCG : public RankingObjective { double p_hessian = p_lambda * (1.0f - p_lambda); int debias_high_rank = static_cast(std::min(high, truncation_level_ - 1)); - int debias_low_rank = static_cast(std::min(low, truncation_level_ - 1)); + int debias_low_rank = static_cast(std::min(low, truncation_level_ - 1)); if (unbiased_) { // formula (37) @@ -312,7 +312,7 @@ class LambdarankNDCG : public RankingObjective { sum_lambdas -= 2 * p_lambda; } } - + if (norm_ && sum_lambdas > 0) { double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas; for (data_size_t i = 0; i < cnt; ++i) { From 88e354206dc27f02ab00bfdc3e83703800718eda Mon Sep 17 00:00:00 2001 From: robhowley Date: Thu, 25 Feb 2021 09:31:24 -0500 Subject: [PATCH 19/32] chore: give better name to bias regularizer --- docs/Parameters.rst | 3 +-- include/LightGBM/config.h | 3 +-- src/io/config_auto.cpp | 8 ++++---- src/objective/rank_objective.hpp | 9 +++++---- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index a569ceffaed0..0aa7cc3c88a8 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -1033,11 +1033,10 @@ Objective Parameters - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` -- ``lambdarank_eta`` :raw-html:`🔗︎`, default = ``0.5``, type = double, constraints: ``lambdarank_eta >= 0.0`` +- ``lambdarank_bias_p_norm`` :raw-html:`🔗︎`, default = ``0.5``, type = double, constraints: ``lambdarank_bias_p_norm >= 0.0`` - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` - - position bias ratio regularizer exponent will be set to ``1 / (1 + eta)`` Metric Parameters ----------------- diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 4ee6a04869f7..fd74d948798b 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -889,8 +889,7 @@ struct Config { // check = >=0.0 // desc = used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` - // desc = position bias ratio regularizer exponent will be set to ``1 / (1 + eta)`` - double lambdarank_eta = 0.5; + double lambdarank_bias_p_norm = 0.5; #pragma endregion diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index aaba0c2b8eaf..4dc7f569086e 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -287,7 +287,7 @@ const std::unordered_set& Config::parameter_set() { "label_gain", "lambdarank_unbiased", "lambdarank_position_bins", - "lambdarank_eta", + "lambdarank_bias_p_norm", "metric", "metric_freq", "is_provide_training_metric", @@ -596,8 +596,8 @@ void Config::GetMembersFromString(const std::unordered_map& strs) @@ -470,10 +470,11 @@ class LambdarankNDCG : public RankingObjective { * [arxiv.org/pdf/1809.05818.pdf] */ bool unbiased_; + /*! \brief Position bias regularizer norm */ - double eta_; + double bias_p_norm_; - /*! \brief Position bias regularizer exponent, 1 / (1 + eta) */ + /*! \brief Position bias regularizer exponent, 1 / (1 + bias_p_norm_) */ double position_bias_regularizer; /*! \brief Number of threads */ From 99f4f04ec39933e7f50d8acc8e06dcabbcd07639 Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 3 Mar 2021 14:28:50 -0500 Subject: [PATCH 20/32] chore: tests w configs relevant to unbiased --- tests/python_package_test/test_sklearn.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 945985a9a46e..40952627989a 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -111,7 +111,7 @@ def test_multiclass(): assert gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret) -def test_lambdarank(): +def lambdarank_test_runner(lambdarank_unbiased=False, **kwargs): X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/lambdarank/rank.train')) X_test, y_test = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), @@ -120,15 +120,27 @@ def test_lambdarank(): '../../examples/lambdarank/rank.train.query')) q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/lambdarank/rank.test.query')) - gbm = lgb.LGBMRanker(n_estimators=50) + gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=lambdarank_unbiased, **kwargs) gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) + return gbm + +def test_lambdarank(): + gbm = lambdarank_test_runner() + print(gbm.best_iteration_) assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674 assert gbm.best_score_['valid_0']['ndcg@3'] > 0.578 +def test_lambdarank_unbiased(): + gbm = lambdarank_test_runner(lambdarank_unbiased=True, sigmoid=2) + assert gbm.best_iteration_ <= 24 + assert gbm.best_score_['valid_0']['ndcg@1'] > 0.569 + assert gbm.best_score_['valid_0']['ndcg@3'] > 0.62 + + def test_xendcg(): dir_path = os.path.dirname(os.path.realpath(__file__)) X_train, y_train = load_svmlight_file(os.path.join(dir_path, '../../examples/xendcg/rank.train')) From 87219c2923e07fb2900957b92927318dbd15853c Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 3 Mar 2021 14:33:36 -0500 Subject: [PATCH 21/32] chore: remove unused param, replaced by truncation_level --- docs/Parameters.rst | 4 ---- include/LightGBM/config.h | 4 ---- src/io/config_auto.cpp | 5 ----- 3 files changed, 13 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 0aa7cc3c88a8..e207d3c45aa6 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -1029,10 +1029,6 @@ Objective Parameters - set this to ``true`` to use the position bias correction of `Unbiased LambdaMART `__ -- ``lambdarank_position_bins`` :raw-html:`🔗︎`, default = ``12``, type = int, constraints: ``lambdarank_position_bins > 0`` - - - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` - - ``lambdarank_bias_p_norm`` :raw-html:`🔗︎`, default = ``0.5``, type = double, constraints: ``lambdarank_bias_p_norm >= 0.0`` - used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index fd74d948798b..7a3380981274 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -883,10 +883,6 @@ struct Config { // desc = set this to ``true`` to use the position bias correction of `Unbiased LambdaMART `__ bool lambdarank_unbiased = false; - // check = >0 - // desc = used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` - int lambdarank_position_bins = 12; - // check = >=0.0 // desc = used only in ``lambdarank`` application where ``lambdarank_unbiased = true`` double lambdarank_bias_p_norm = 0.5; diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 4dc7f569086e..b1605cde5ebc 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -286,7 +286,6 @@ const std::unordered_set& Config::parameter_set() { "lambdarank_norm", "label_gain", "lambdarank_unbiased", - "lambdarank_position_bins", "lambdarank_bias_p_norm", "metric", "metric_freq", @@ -593,9 +592,6 @@ void Config::GetMembersFromString(const std::unordered_map Date: Wed, 3 Mar 2021 14:38:39 -0500 Subject: [PATCH 22/32] fix: update workflow trigger to correct branch name --- .github/workflows/optional_checks.yml | 2 +- .github/workflows/python_package.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml index a24657e9b641..cc8c9a3c4b91 100644 --- a/.github/workflows/optional_checks.yml +++ b/.github/workflows/optional_checks.yml @@ -4,7 +4,7 @@ on: pull_request: branches: - master - - position_debias + - position_unbiased jobs: all-successful: diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index df7301af3daa..6f5a39879a3f 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -7,7 +7,7 @@ on: pull_request: branches: - master - - position_debias + - position_unbiased env: CONDA_ENV: test-env From da17901f04d25434038b8e393f94cc2de023c42e Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 18 Aug 2021 10:50:09 -0400 Subject: [PATCH 23/32] more merge conflicts --- src/objective/rank_objective.hpp | 4 ---- tests/python_package_test/test_sklearn.py | 16 ++-------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index b0258c567154..ae78f476b176 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -409,7 +409,6 @@ class LambdarankNDCG : public RankingObjective { const char* GetName() const override { return "lambdarank"; } private: -<<<<<<< HEAD void LogDebugPositionBiases() const { std::stringstream message_stream; message_stream << std::setw(10) << "position" @@ -432,10 +431,7 @@ class LambdarankNDCG : public RankingObjective { } } - /*! \brief Simgoid param */ -======= /*! \brief Sigmoid param */ ->>>>>>> master double sigmoid_; /*! \brief Normalize the lambdas or not */ bool norm_; diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 39c0591d71ee..c541dac0c783 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -112,31 +112,19 @@ def test_multiclass(): assert gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret) -<<<<<<< HEAD def lambdarank_test_runner(lambdarank_unbiased=False, **kwargs): - X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../examples/lambdarank/rank.train')) - X_test, y_test = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../examples/lambdarank/rank.test')) - q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../examples/lambdarank/rank.train.query')) - q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../examples/lambdarank/rank.test.query')) - gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=lambdarank_unbiased, **kwargs) -======= -def test_lambdarank(): rank_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'lambdarank' X_train, y_train = load_svmlight_file(str(rank_example_dir / 'rank.train')) X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test')) q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query')) q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) - gbm = lgb.LGBMRanker(n_estimators=50) ->>>>>>> master + gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=lambdarank_unbiased, **kwargs) gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) return gbm + def test_lambdarank(): gbm = lambdarank_test_runner() print(gbm.best_iteration_) From a821813974ea70ea73bcd152b981c199ee3d6fcb Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 18 Aug 2021 10:52:43 -0400 Subject: [PATCH 24/32] remove git workflow customizations --- .github/workflows/optional_checks.yml | 1 - .github/workflows/python_package.yml | 1 - .github/workflows/static_analysis.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml index 77abe7347edb..9b679986f6ec 100644 --- a/.github/workflows/optional_checks.yml +++ b/.github/workflows/optional_checks.yml @@ -4,7 +4,6 @@ on: pull_request: branches: - master - - position_unbiased jobs: all-successful: diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index 6f5a39879a3f..22a9b58ef993 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -7,7 +7,6 @@ on: pull_request: branches: - master - - position_unbiased env: CONDA_ENV: test-env diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 7b5f825db0b9..cdc917903f46 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -9,7 +9,6 @@ on: pull_request: branches: - master - - position_debias env: COMPILER: 'gcc' From 32aa90439e0fea0299d8d6fef1cc328d82a32e4c Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 18 Aug 2021 11:08:54 -0400 Subject: [PATCH 25/32] remove extra comments --- src/objective/rank_objective.hpp | 63 +++++--------------------------- 1 file changed, 9 insertions(+), 54 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index ae78f476b176..e7b8ee725d20 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -162,17 +162,7 @@ class LambdarankNDCG : public RankingObjective { const label_t* label, const double* score, score_t* lambdas, score_t* hessians) const override { - // - // query_id : the query for which we are computing gradients - // cnt : number of documents returned for the query - // label : the Y values (relevance labels) for each document - // score : current predicted score for the associated document - // lambdas : array updated in place, gradients for this query - // hessians : array updated in place, second derivs for this query - // - - // queries are processed in parallel - // get id for current thread so safely accumulate bias corrections + const int tid = omp_get_thread_num(); // get thread id // get max DCG on current query @@ -185,46 +175,32 @@ class LambdarankNDCG : public RankingObjective { } // get sorted indices for scores - // by first fill the vector 0, 1, ... cnt-1 std::vector sorted_idx(cnt); for (data_size_t i = 0; i < cnt; ++i) { sorted_idx[i] = i; } - - // and then sort the result indices by score descending - // eg [3, 2, 4, 1] means document 3 currently has highest score, document 1 lowest std::stable_sort( sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); // get best and worst score const double best_score = score[sorted_idx[0]]; - - // worst score should be last item of sorted_idx - // if that item is env min score (-inf), take the one before it data_size_t worst_idx = cnt - 1; if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) { worst_idx -= 1; } const double worst_score = score[sorted_idx[worst_idx]]; - - // accumulator for lambdas used in normalization when norm_ = true double sum_lambdas = 0.0; - // accmulate lambdas by pairs that contain at least one document above truncation level - // working across the cnt number of documents for the query - // this going in order of score desc since start w sorted_idx[0] + // accumulate lambdas by pairs that contain at least one document above truncation level for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { if (score[sorted_idx[i]] == kMinScore) { continue; } - - // compare doc i to all other docs j of differing level of relevance for (data_size_t j = i + 1; j < cnt; ++j) { if (score[sorted_idx[j]] == kMinScore) { continue; } // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } - // determine more relevant document pair data_size_t high_rank, low_rank; if (label[sorted_idx[i]] > label[sorted_idx[j]]) { high_rank = i; @@ -235,11 +211,11 @@ class LambdarankNDCG : public RankingObjective { } // info of more relevant doc - const data_size_t high = sorted_idx[high_rank]; // doc index in query results - const int high_label = static_cast(label[high]); // label (Y) - const double high_score = score[high]; // current model predicted score - const double high_label_gain = label_gain_[high_label]; // default: 2^high_label - 1 - const double high_discount = DCGCalculator::GetDiscount(high_rank); // 1/log2(2 + i) + const data_size_t high = sorted_idx[high_rank]; + const int high_label = static_cast(label[high]); + const double high_score = score[high]; + const double high_label_gain = label_gain_[high_label]; + const double high_discount = DCGCalculator::GetDiscount(high_rank); // info of less relevant doc const data_size_t low = sorted_idx[low_rank]; @@ -248,24 +224,14 @@ class LambdarankNDCG : public RankingObjective { const double low_label_gain = label_gain_[low_label]; const double low_discount = DCGCalculator::GetDiscount(low_rank); - // - // note on subsequent comments - // in the papers, customary to assume i is more relevant than j - // formula numbers are from unbiased lambdamart paper - // - // si - sj const double delta_score = high_score - low_score; - // get dcg gap - // default: 2^i - 2^j > 0 const double dcg_gap = high_label_gain - low_label_gain; // get discount of this pair - // |1/log2(2 + i) - 1/log2(2 + j)| const double paired_discount = fabs(high_discount - low_discount); // get delta NDCG - // (2^i - 2^j) * |1/log2(2 + i) - 1/log2(2 + j)| / max_dcg double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; // regularize the delta_pair_NDCG by score distance @@ -274,31 +240,20 @@ class LambdarankNDCG : public RankingObjective { } // calculate lambda for this pair - // part of (34) - // (34) and (36) are used to get the unbiased gradient estimates - double p_lambda = GetSigmoid(delta_score); // 1 / (1 + e^(sigmoid_ * (si - sj))) - - // d/dx {part of (34)} from above + double p_lambda = GetSigmoid(delta_score); double p_hessian = p_lambda * (1.0f - p_lambda); int debias_high_rank = static_cast(std::min(high, truncation_level_ - 1)); int debias_low_rank = static_cast(std::min(low, truncation_level_ - 1)); if (unbiased_) { - // formula (37) - // used to get t+ and t- from (30)/(31) respectively double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; - // formula (30) // more relevant (clicked) gets debiased by less relevant (unclicked) i_costs_buffer_[tid][debias_high_rank] += p_cost / j_biases_pow_[debias_low_rank]; - - // // formula (31) - // // and vice versa - j_costs_buffer_[tid][debias_low_rank] += p_cost / i_biases_pow_[debias_high_rank]; + j_costs_buffer_[tid][debias_low_rank] += p_cost / i_biases_pow_[debias_high_rank]; // and vice versa } - // update {(34) and (36) for debiasing} p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; // remainder of d/dx {(34) and (36) for debiasing} From b450bfa3b76ca51fdaef25e8a93f21867fb25a32 Mon Sep 17 00:00:00 2001 From: robhowley Date: Wed, 18 Aug 2021 11:10:24 -0400 Subject: [PATCH 26/32] remove print statement --- tests/python_package_test/test_sklearn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index c541dac0c783..0c187a16dfc4 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -127,7 +127,6 @@ def lambdarank_test_runner(lambdarank_unbiased=False, **kwargs): def test_lambdarank(): gbm = lambdarank_test_runner() - print(gbm.best_iteration_) assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674 assert gbm.best_score_['valid_0']['ndcg@3'] > 0.578 From fc5b92d357539880dba49120a87d40e488d4a469 Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 7 Sep 2021 11:54:02 -0400 Subject: [PATCH 27/32] remove test refactor, line spacing, and comment typo fix --- src/objective/rank_objective.hpp | 20 ++------------------ tests/python_package_test/test_sklearn.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index e7b8ee725d20..2e05d48469a8 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -167,13 +167,11 @@ class LambdarankNDCG : public RankingObjective { // get max DCG on current query const double inverse_max_dcg = inverse_max_dcgs_[query_id]; - // initialize with zero for (data_size_t i = 0; i < cnt; ++i) { lambdas[i] = 0.0f; hessians[i] = 0.0f; } - // get sorted indices for scores std::vector sorted_idx(cnt); for (data_size_t i = 0; i < cnt; ++i) { @@ -182,7 +180,6 @@ class LambdarankNDCG : public RankingObjective { std::stable_sort( sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); - // get best and worst score const double best_score = score[sorted_idx[0]]; data_size_t worst_idx = cnt - 1; @@ -197,10 +194,8 @@ class LambdarankNDCG : public RankingObjective { if (score[sorted_idx[i]] == kMinScore) { continue; } for (data_size_t j = i + 1; j < cnt; ++j) { if (score[sorted_idx[j]] == kMinScore) { continue; } - // skip pairs with the same labels if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; } - data_size_t high_rank, low_rank; if (label[sorted_idx[i]] > label[sorted_idx[j]]) { high_rank = i; @@ -209,15 +204,11 @@ class LambdarankNDCG : public RankingObjective { high_rank = j; low_rank = i; } - - // info of more relevant doc const data_size_t high = sorted_idx[high_rank]; const int high_label = static_cast(label[high]); const double high_score = score[high]; const double high_label_gain = label_gain_[high_label]; const double high_discount = DCGCalculator::GetDiscount(high_rank); - - // info of less relevant doc const data_size_t low = sorted_idx[low_rank]; const int low_label = static_cast(label[low]); const double low_score = score[low]; @@ -226,19 +217,16 @@ class LambdarankNDCG : public RankingObjective { const double delta_score = high_score - low_score; + // get dcg gap const double dcg_gap = high_label_gain - low_label_gain; - // get discount of this pair const double paired_discount = fabs(high_discount - low_discount); - // get delta NDCG double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; - - // regularize the delta_pair_NDCG by score distance + // regular the delta_pair_NDCG by score distance if ((norm_ || unbiased_) && best_score != worst_score) { delta_pair_NDCG /= (0.01f + fabs(delta_score)); } - // calculate lambda for this pair double p_lambda = GetSigmoid(delta_score); double p_hessian = p_lambda * (1.0f - p_lambda); @@ -255,20 +243,16 @@ class LambdarankNDCG : public RankingObjective { } p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; - - // remainder of d/dx {(34) and (36) for debiasing} p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; lambdas[low] -= static_cast(p_lambda); hessians[low] += static_cast(p_hessian); lambdas[high] += static_cast(p_lambda); hessians[high] += static_cast(p_hessian); - // lambda is negative, so use minus to accumulate sum_lambdas -= 2 * p_lambda; } } - if (norm_ && sum_lambdas > 0) { double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas; for (data_size_t i = 0; i < cnt; ++i) { diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 0c187a16dfc4..dc714f4bdf5a 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -112,28 +112,31 @@ def test_multiclass(): assert gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret) -def lambdarank_test_runner(lambdarank_unbiased=False, **kwargs): +def test_lambdarank(): rank_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'lambdarank' X_train, y_train = load_svmlight_file(str(rank_example_dir / 'rank.train')) X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test')) q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query')) q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) - gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=lambdarank_unbiased, **kwargs) + gbm = lgb.LGBMRanker(n_estimators=50) gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) - return gbm - - -def test_lambdarank(): - gbm = lambdarank_test_runner() assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674 assert gbm.best_score_['valid_0']['ndcg@3'] > 0.578 def test_lambdarank_unbiased(): - gbm = lambdarank_test_runner(lambdarank_unbiased=True, sigmoid=2) + rank_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'lambdarank' + X_train, y_train = load_svmlight_file(str(rank_example_dir / 'rank.train')) + X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test')) + q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query')) + q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) + gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=True, sigmoid=2) + gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], + eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, + callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.569 assert gbm.best_score_['valid_0']['ndcg@3'] > 0.62 From ebb8f404ab5da5a1cf45024dcadf40777b4de08a Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 7 Sep 2021 12:02:57 -0400 Subject: [PATCH 28/32] remove gitignore changes, more whitespace removal --- .gitignore | 5 +---- src/objective/rank_objective.hpp | 6 ++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 7e8f4ce4c612..c281e91765c0 100644 --- a/.gitignore +++ b/.gitignore @@ -461,7 +461,4 @@ dask-worker-space/ *.pem *.pub *.rdp -*_rsa - -# swig jni -*_swig.jnilib \ No newline at end of file +*_rsa \ No newline at end of file diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 2e05d48469a8..950992259309 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -188,8 +188,7 @@ class LambdarankNDCG : public RankingObjective { } const double worst_score = score[sorted_idx[worst_idx]]; double sum_lambdas = 0.0; - - // accumulate lambdas by pairs that contain at least one document above truncation level + // start accmulate lambdas by pairs that contain at least one document above truncation level for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) { if (score[sorted_idx[i]] == kMinScore) { continue; } for (data_size_t j = i + 1; j < cnt; ++j) { @@ -241,10 +240,9 @@ class LambdarankNDCG : public RankingObjective { i_costs_buffer_[tid][debias_high_rank] += p_cost / j_biases_pow_[debias_low_rank]; j_costs_buffer_[tid][debias_low_rank] += p_cost / i_biases_pow_[debias_high_rank]; // and vice versa } - + // update p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; - lambdas[low] -= static_cast(p_lambda); hessians[low] += static_cast(p_hessian); lambdas[high] += static_cast(p_lambda); From 6ae3cfe7ff6d088395748eb716d2b58a2ca3539f Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 7 Sep 2021 12:06:06 -0400 Subject: [PATCH 29/32] end w new line --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c281e91765c0..5a90094850b9 100644 --- a/.gitignore +++ b/.gitignore @@ -461,4 +461,4 @@ dask-worker-space/ *.pem *.pub *.rdp -*_rsa \ No newline at end of file +*_rsa From 9572d7d0310b2408745705156de45681300a1c2d Mon Sep 17 00:00:00 2001 From: robhowley Date: Tue, 7 Sep 2021 12:41:15 -0400 Subject: [PATCH 30/32] fix redundant blank line found in cpp lint --- src/objective/rank_objective.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index 950992259309..b99e4b974578 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -162,7 +162,6 @@ class LambdarankNDCG : public RankingObjective { const label_t* label, const double* score, score_t* lambdas, score_t* hessians) const override { - const int tid = omp_get_thread_num(); // get thread id // get max DCG on current query From ef89b413282caa39b8fc7dd1dd90a5183bc18f57 Mon Sep 17 00:00:00 2001 From: Yu Shi Date: Fri, 29 Jul 2022 03:37:42 +0000 Subject: [PATCH 31/32] apply review comments from shiyu1994 in #4531 --- src/objective/rank_objective.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index b99e4b974578..eab95ce46e1d 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -121,7 +121,7 @@ class LambdarankNDCG : public RankingObjective { num_threads_ = omp_get_num_threads(); } - position_bias_regularizer = 1.0f / (1.0f + bias_p_norm_); + position_bias_regularizer_ = 1.0f / (1.0f + bias_p_norm_); } explicit LambdarankNDCG(const std::vector& strs) @@ -229,8 +229,8 @@ class LambdarankNDCG : public RankingObjective { double p_lambda = GetSigmoid(delta_score); double p_hessian = p_lambda * (1.0f - p_lambda); - int debias_high_rank = static_cast(std::min(high, truncation_level_ - 1)); - int debias_low_rank = static_cast(std::min(low, truncation_level_ - 1)); + int debias_high_rank = static_cast(std::min(high_rank, truncation_level_ - 1)); + int debias_low_rank = static_cast(std::min(low_rank, truncation_level_ - 1)); if (unbiased_) { double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; @@ -329,11 +329,13 @@ class LambdarankNDCG : public RankingObjective { for (int i = 0; i < truncation_level_; i++) { // Update bias - i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], position_bias_regularizer); - j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], position_bias_regularizer); + i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], position_bias_regularizer_); + j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], position_bias_regularizer_); } + #ifdef DEBUG LogDebugPositionBiases(); + #endif // DEBUG for (int i = 0; i < truncation_level_; i++) { // Clear position info @@ -412,7 +414,7 @@ class LambdarankNDCG : public RankingObjective { double bias_p_norm_; /*! \brief Position bias regularizer exponent, 1 / (1 + bias_p_norm_) */ - double position_bias_regularizer; + double position_bias_regularizer_; /*! \brief Number of threads */ int num_threads_; From b0be720c8507111aaa9c39db63c134f53bbde87d Mon Sep 17 00:00:00 2001 From: Yu Shi Date: Fri, 29 Jul 2022 04:05:24 +0000 Subject: [PATCH 32/32] use callback for early stopping --- tests/python_package_test/test_sklearn.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index e7d8c8de4a6a..08e6936fd423 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -166,9 +166,19 @@ def test_lambdarank_unbiased(): q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query')) q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) gbm = lgb.LGBMRanker(n_estimators=50, lambdarank_unbiased=True, sigmoid=2) - gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], - eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, - callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) + gbm.fit( + X_train, + y_train, + group=q_train, + eval_set=[(X_test, y_test)], + eval_group=[q_test], + eval_at=[1, 3], + verbose=False, + callbacks=[ + lgb.early_stopping(10), + lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x)) + ] + ) assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.569 assert gbm.best_score_['valid_0']['ndcg@3'] > 0.62