-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ranking algorithm: position unbiased option #4531
Changes from all commits
e5b6f31
ca85e05
7718343
7063000
4f4387a
482c143
4bae3ad
1449576
c49005e
3cbcb62
a0680e6
8827540
6798c2a
e1c8158
26b316b
4b8c2e9
c50e92c
592ade6
c29a6f8
9796a72
a00ac5c
672ec5b
d3347d2
88e3542
52f6243
99f4f04
87219c2
baa4a0d
20fe972
9ac06d5
da17901
a821813
32aa904
b450bfa
fc5b92d
ebb8f40
6ae3cfe
9572d7d
0cbe02d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
|
||
#include <LightGBM/metric.h> | ||
#include <LightGBM/objective_function.h> | ||
#include <LightGBM/utils/log.h> | ||
|
||
#include <algorithm> | ||
#include <cmath> | ||
|
@@ -101,7 +102,9 @@ class LambdarankNDCG : public RankingObjective { | |
: RankingObjective(config), | ||
sigmoid_(config.sigmoid), | ||
norm_(config.lambdarank_norm), | ||
truncation_level_(config.lambdarank_truncation_level) { | ||
truncation_level_(config.lambdarank_truncation_level), | ||
unbiased_(config.lambdarank_unbiased), | ||
bias_p_norm_(config.lambdarank_bias_p_norm) { | ||
label_gain_ = config.label_gain; | ||
// initialize DCG calculator | ||
DCGCalculator::DefaultLabelGain(&label_gain_); | ||
|
@@ -111,6 +114,14 @@ class LambdarankNDCG : public RankingObjective { | |
if (sigmoid_ <= 0.0) { | ||
Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_); | ||
} | ||
|
||
#pragma omp parallel | ||
#pragma omp master | ||
{ | ||
num_threads_ = omp_get_num_threads(); | ||
} | ||
|
||
position_bias_regularizer = 1.0f / (1.0f + bias_p_norm_); | ||
} | ||
|
||
explicit LambdarankNDCG(const std::vector<std::string>& strs) | ||
|
@@ -135,12 +146,24 @@ class LambdarankNDCG : public RankingObjective { | |
} | ||
// construct Sigmoid table to speed up Sigmoid transform | ||
ConstructSigmoidTable(); | ||
|
||
// initialize position bias vectors | ||
InitPositionBiasesAndGradients(); | ||
} | ||
|
||
void GetGradients(const double* score, score_t* gradients, | ||
score_t* hessians) const override { | ||
RankingObjective::GetGradients(score, gradients, hessians); | ||
|
||
if (unbiased_) { UpdatePositionBiasesAndGradients(); } | ||
} | ||
|
||
inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, | ||
const label_t* label, const double* score, | ||
score_t* lambdas, | ||
score_t* hessians) const override { | ||
const int tid = omp_get_thread_num(); // get thread id | ||
|
||
// get max DCG on current query | ||
const double inverse_max_dcg = inverse_max_dcgs_[query_id]; | ||
// initialize with zero | ||
|
@@ -199,15 +222,26 @@ class LambdarankNDCG : public RankingObjective { | |
// get delta NDCG | ||
double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; | ||
// regular the delta_pair_NDCG by score distance | ||
if (norm_ && best_score != worst_score) { | ||
if ((norm_ || unbiased_) && best_score != worst_score) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is unclear to me why with unbiased Lambdarank we must normalize the delta score here. |
||
delta_pair_NDCG /= (0.01f + fabs(delta_score)); | ||
} | ||
// calculate lambda for this pair | ||
double p_lambda = GetSigmoid(delta_score); | ||
double p_hessian = p_lambda * (1.0f - p_lambda); | ||
|
||
int debias_high_rank = static_cast<int>(std::min(high, truncation_level_ - 1)); | ||
int debias_low_rank = static_cast<int>(std::min(low, truncation_level_ - 1)); | ||
Comment on lines
+232
to
+233
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi I am trying to investigate what the issue is with the biases for the truncation cutoff spot that I'm observing and I'm wondering if the thresholding here is the culprit. Within the context of this implementation, the case where Correct me if I'm wrong but the only case the above happens is when I was wondering if it's better to check the truncation level for both high and low ranks and only update the position biases when both are lower than As I'm looking for the update to introduce the position column as a metadata value, what I'm thinking is along the lines of: const data_size_t high = sorted_idx[high_rank];
const data_size_t low = sorted_idx[low_rank];
data_size_t high_position, low_position;
if (unbiased_) {
// record_positions_ is a vector of length data_size, with the position of each
// record in the data. start is the offset for the current query
high_position = record_positions_[start + sorted_idx[high_rank]];
low_position = record_positions_[start + sorted_idx[low_rank]];
} else {
high_position = high;
low_position = low;
}
if (unbiased_) {
double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG;
// more relevant (clicked) gets debiased by less relevant (unclicked), only if within truncation levels
if (high_position <= truncation_level_ && low_position <= truncation_level_) {
i_costs_buffer_[tid][high_position] += p_cost / j_biases_pow_[low_position];
j_costs_buffer_[tid][low_position] += p_cost / i_biases_pow_[high_position]; // and vice versa
}
}
// By default we set values of 1.0 as no-ops
double i_bias_pow = 1.0;
double j_bias_pow = 1.0;
// We only use actual bias values if they are both within the truncation limits
if (unbiased_ && high_position <= truncation_level_ && low_position <= truncation_level_) {
i_bias_pow = i_biases_pow_[high_position];
j_bias_pow = j_biases_pow_[low_position];
}
// update, either with 1.0 values if at least one of data points ended up outside the truncation threshold or the actual biases
p_lambda *= -sigmoid_ * delta_pair_NDCG / i_bias_pow / j_bias_pow;
p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_bias_pow / j_bias_pow; Does this make sense? The main suggestion is to not "clamp" the bias positions between It's a bit unclear to me still the difference between What I want to ensure is that: since my There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thvasilo Thanks for reviewing this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe here |
||
|
||
if (unbiased_) { | ||
double p_cost = log(1.0f / (1.0f - p_lambda)) * delta_pair_NDCG; | ||
|
||
// more relevant (clicked) gets debiased by less relevant (unclicked) | ||
i_costs_buffer_[tid][debias_high_rank] += p_cost / j_biases_pow_[debias_low_rank]; | ||
j_costs_buffer_[tid][debias_low_rank] += p_cost / i_biases_pow_[debias_high_rank]; // and vice versa | ||
} | ||
// update | ||
p_lambda *= -sigmoid_ * delta_pair_NDCG; | ||
p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG; | ||
p_lambda *= -sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suppose we are now before starting iteration |
||
p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG / i_biases_pow_[debias_high_rank] / j_biases_pow_[debias_low_rank]; | ||
lambdas[low] -= static_cast<score_t>(p_lambda); | ||
hessians[low] += static_cast<score_t>(p_hessian); | ||
lambdas[high] += static_cast<score_t>(p_lambda); | ||
|
@@ -253,9 +287,86 @@ class LambdarankNDCG : public RankingObjective { | |
} | ||
} | ||
|
||
void InitPositionBiasesAndGradients() { | ||
i_biases_pow_.resize(truncation_level_); | ||
j_biases_pow_.resize(truncation_level_); | ||
i_costs_.resize(truncation_level_); | ||
j_costs_.resize(truncation_level_); | ||
|
||
for (int i = 0; i < truncation_level_; ++i) { | ||
// init position biases | ||
i_biases_pow_[i] = 1.0f; | ||
j_biases_pow_[i] = 1.0f; | ||
|
||
// init position gradients | ||
i_costs_[i] = 0.0f; | ||
j_costs_[i] = 0.0f; | ||
} | ||
|
||
// init gradient buffers for gathering results across threads | ||
for (int i = 0; i < num_threads_; i++) { | ||
i_costs_buffer_.emplace_back(truncation_level_, 0.0f); | ||
j_costs_buffer_.emplace_back(truncation_level_, 0.0f); | ||
} | ||
} | ||
|
||
void UpdatePositionBiasesAndGradients() const { | ||
// accumulate the parallel results | ||
for (int i = 0; i < num_threads_; i++) { | ||
for (int j = 0; j < truncation_level_; j++) { | ||
i_costs_[j] += i_costs_buffer_[i][j]; | ||
j_costs_[j] += j_costs_buffer_[i][j]; | ||
} | ||
} | ||
|
||
for (int i = 0; i < num_threads_; i++) { | ||
for (int j = 0; j < truncation_level_; j++) { | ||
// clear buffer for next run | ||
i_costs_buffer_[i][j] = 0.0f; | ||
j_costs_buffer_[i][j] = 0.0f; | ||
} | ||
} | ||
|
||
for (int i = 0; i < truncation_level_; i++) { | ||
// Update bias | ||
i_biases_pow_[i] = pow(i_costs_[i] / i_costs_[0], position_bias_regularizer); | ||
j_biases_pow_[i] = pow(j_costs_[i] / j_costs_[0], position_bias_regularizer); | ||
} | ||
|
||
LogDebugPositionBiases(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. while i found being able to track evolution and end point of position bias adjustment values useful in debug mode, happy to remove this since it doesn't have any new feature benefit during normal usage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Please remove this or wrap the debug code with |
||
|
||
for (int i = 0; i < truncation_level_; i++) { | ||
// Clear position info | ||
i_costs_[i] = 0.0f; | ||
j_costs_[i] = 0.0f; | ||
} | ||
} | ||
|
||
const char* GetName() const override { return "lambdarank"; } | ||
|
||
private: | ||
void LogDebugPositionBiases() const { | ||
std::stringstream message_stream; | ||
message_stream << std::setw(10) << "position" | ||
<< std::setw(15) << "bias_i" | ||
<< std::setw(15) << "bias_j" | ||
<< std::setw(15) << "i_cost" | ||
<< std::setw(15) << "j_cost" | ||
<< std::endl; | ||
Log::Debug(message_stream.str().c_str()); | ||
message_stream.str(""); | ||
|
||
for (int i = 0; i < truncation_level_; ++i) { | ||
message_stream << std::setw(10) << i | ||
<< std::setw(15) << i_biases_pow_[i] | ||
<< std::setw(15) << j_biases_pow_[i] | ||
<< std::setw(15) << i_costs_[i] | ||
<< std::setw(15) << j_costs_[i]; | ||
Log::Debug(message_stream.str().c_str()); | ||
message_stream.str(""); | ||
} | ||
} | ||
|
||
/*! \brief Sigmoid param */ | ||
double sigmoid_; | ||
/*! \brief Normalize the lambdas or not */ | ||
|
@@ -276,6 +387,35 @@ class LambdarankNDCG : public RankingObjective { | |
double max_sigmoid_input_ = 50; | ||
/*! \brief Factor that covert score to bin in Sigmoid table */ | ||
double sigmoid_table_idx_factor_; | ||
|
||
// bias correction variables | ||
/*! \brief power of (click) position biases */ | ||
mutable std::vector<label_t> i_biases_pow_; | ||
|
||
/*! \brief power of (unclick) position biases */ | ||
mutable std::vector<label_t> j_biases_pow_; | ||
|
||
// mutable double position cost; | ||
mutable std::vector<label_t> i_costs_; | ||
mutable std::vector<std::vector<label_t>> i_costs_buffer_; | ||
|
||
mutable std::vector<label_t> j_costs_; | ||
mutable std::vector<std::vector<label_t>> j_costs_buffer_; | ||
|
||
/*! | ||
* \brief Should use lambdarank with position bias correction | ||
* [arxiv.org/pdf/1809.05818.pdf] | ||
*/ | ||
bool unbiased_; | ||
|
||
/*! \brief Position bias regularizer norm */ | ||
double bias_p_norm_; | ||
|
||
/*! \brief Position bias regularizer exponent, 1 / (1 + bias_p_norm_) */ | ||
double position_bias_regularizer; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We'd better add an underscore after the member's name to be consistent with the whole code base, i.e. |
||
|
||
/*! \brief Number of threads */ | ||
int num_threads_; | ||
}; | ||
|
||
/*! | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this initialization be conditional on the value of
unbiased_
? Otherwise, the default behavior will be to initialize them and then not use them, right?(apologies if I've misunderstood how this works)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
they get initialized to 1 and only updated if
unbiased_
is true. so any time you see them whenunbiased_
is false, you're just dividing by 1.