From 516bd37a28a0dfa7c2bc4a24587dbbcb8a697eb2 Mon Sep 17 00:00:00 2001 From: "Truman, Wentao TIAN" Date: Tue, 5 Nov 2019 10:15:09 +0800 Subject: [PATCH] renamed variables from bias to offset (#2539) --- src/boosting/gbdt.cpp | 22 +++++----- src/boosting/rf.hpp | 10 ++--- src/c_api.cpp | 16 +++---- src/io/dense_bin.hpp | 8 ++-- src/io/dense_nbits_bin.hpp | 8 ++-- src/io/parser.hpp | 12 +++--- src/io/sparse_bin.hpp | 8 ++-- src/objective/multiclass_objective.hpp | 4 +- src/treelearner/feature_histogram.hpp | 42 +++++++++---------- .../voting_parallel_tree_learner.cpp | 4 +- 10 files changed, 67 insertions(+), 67 deletions(-) diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp index 4d79d482c2cb..2d469d4c28a3 100644 --- a/src/boosting/gbdt.cpp +++ b/src/boosting/gbdt.cpp @@ -310,9 +310,9 @@ void GBDT::RefitTree(const std::vector>& tree_leaf_prediction) leaf_pred[i] = tree_leaf_prediction[i][model_index]; CHECK(leaf_pred[i] < models_[model_index]->num_leaves()); } - size_t bias = static_cast(tree_id) * num_data_; - auto grad = gradients_.data() + bias; - auto hess = hessians_.data() + bias; + size_t offset = static_cast(tree_id) * num_data_; + auto grad = gradients_.data() + offset; + auto hess = hessians_.data() + offset; auto new_tree = tree_learner_->FitByExistingTree(models_[model_index].get(), leaf_pred, grad, hess); train_score_updater_->AddScore(tree_learner_.get(), new_tree, tree_id); models_[model_index].reset(new_tree); @@ -381,26 +381,26 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) { bool should_continue = false; for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { - const size_t bias = static_cast(cur_tree_id) * num_data_; + const size_t offset = static_cast(cur_tree_id) * num_data_; std::unique_ptr new_tree(new Tree(2)); if (class_need_train_[cur_tree_id] && train_data_->num_features() > 0) { - auto grad = gradients + bias; - auto hess = hessians + bias; + auto grad = gradients + offset; + auto hess = hessians + offset; // need to copy gradients for bagging subset. if (is_use_subset_ && bag_data_cnt_ < num_data_) { for (int i = 0; i < bag_data_cnt_; ++i) { - gradients_[bias + i] = grad[bag_data_indices_[i]]; - hessians_[bias + i] = hess[bag_data_indices_[i]]; + gradients_[offset + i] = grad[bag_data_indices_[i]]; + hessians_[offset + i] = hess[bag_data_indices_[i]]; } - grad = gradients_.data() + bias; - hess = hessians_.data() + bias; + grad = gradients_.data() + offset; + hess = hessians_.data() + offset; } new_tree.reset(tree_learner_->Train(grad, hess, is_constant_hessian_, forced_splits_json_)); } if (new_tree->num_leaves() > 1) { should_continue = true; - auto score_ptr = train_score_updater_->score() + bias; + auto score_ptr = train_score_updater_->score() + offset; auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast(label[i]) - score_ptr[i]; }; tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter, num_data_, bag_data_indices_.data(), bag_data_cnt_); diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp index ba67473fdfb5..9f4369e33870 100644 --- a/src/boosting/rf.hpp +++ b/src/boosting/rf.hpp @@ -91,9 +91,9 @@ class RF : public GBDT { std::vector tmp_scores(total_size, 0.0f); #pragma omp parallel for schedule(static) for (int j = 0; j < num_tree_per_iteration_; ++j) { - size_t bias = static_cast(j)* num_data_; + size_t offset = static_cast(j)* num_data_; for (data_size_t i = 0; i < num_data_; ++i) { - tmp_scores[bias + i] = init_scores_[j]; + tmp_scores[offset + i] = init_scores_[j]; } } objective_function_-> @@ -110,10 +110,10 @@ class RF : public GBDT { hessians = hessians_.data(); for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { std::unique_ptr new_tree(new Tree(2)); - size_t bias = static_cast(cur_tree_id)* num_data_; + size_t offset = static_cast(cur_tree_id)* num_data_; if (class_need_train_[cur_tree_id]) { - auto grad = gradients + bias; - auto hess = hessians + bias; + auto grad = gradients + offset; + auto hess = hessians + offset; // need to copy gradients for bagging subset. if (is_use_subset_ && bag_data_cnt_ < num_data_) { diff --git a/src/c_api.cpp b/src/c_api.cpp index 5b28be8346e5..2ec5045b369d 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -1764,8 +1764,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind const int32_t* ptr_col_ptr = reinterpret_cast(col_ptr); int64_t start = ptr_col_ptr[col_idx]; int64_t end = ptr_col_ptr[col_idx + 1]; - return [=] (int bias) { - int64_t i = static_cast(start + bias); + return [=] (int offset) { + int64_t i = static_cast(start + offset); if (i >= end) { return std::make_pair(-1, 0.0); } @@ -1777,8 +1777,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind const int64_t* ptr_col_ptr = reinterpret_cast(col_ptr); int64_t start = ptr_col_ptr[col_idx]; int64_t end = ptr_col_ptr[col_idx + 1]; - return [=] (int bias) { - int64_t i = static_cast(start + bias); + return [=] (int offset) { + int64_t i = static_cast(start + offset); if (i >= end) { return std::make_pair(-1, 0.0); } @@ -1793,8 +1793,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind const int32_t* ptr_col_ptr = reinterpret_cast(col_ptr); int64_t start = ptr_col_ptr[col_idx]; int64_t end = ptr_col_ptr[col_idx + 1]; - return [=] (int bias) { - int64_t i = static_cast(start + bias); + return [=] (int offset) { + int64_t i = static_cast(start + offset); if (i >= end) { return std::make_pair(-1, 0.0); } @@ -1806,8 +1806,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind const int64_t* ptr_col_ptr = reinterpret_cast(col_ptr); int64_t start = ptr_col_ptr[col_idx]; int64_t end = ptr_col_ptr[col_idx + 1]; - return [=] (int bias) { - int64_t i = static_cast(start + bias); + return [=] (int offset) { + int64_t i = static_cast(start + offset); if (i >= end) { return std::make_pair(-1, 0.0); } diff --git a/src/io/dense_bin.hpp b/src/io/dense_bin.hpp index 92c015fb9d96..0553e0d2917f 100644 --- a/src/io/dense_bin.hpp +++ b/src/io/dense_bin.hpp @@ -24,9 +24,9 @@ class DenseBinIterator: public BinIterator { max_bin_(static_cast(max_bin)), default_bin_(static_cast(default_bin)) { if (default_bin_ == 0) { - bias_ = 1; + offset_ = 1; } else { - bias_ = 0; + offset_ = 0; } } inline uint32_t RawGet(data_size_t idx) override; @@ -38,7 +38,7 @@ class DenseBinIterator: public BinIterator { VAL_T min_bin_; VAL_T max_bin_; VAL_T default_bin_; - uint8_t bias_; + uint8_t offset_; }; /*! * \brief Used to store bins for dense feature @@ -334,7 +334,7 @@ template uint32_t DenseBinIterator::Get(data_size_t idx) { auto ret = bin_data_->data_[idx]; if (ret >= min_bin_ && ret <= max_bin_) { - return ret - min_bin_ + bias_; + return ret - min_bin_ + offset_; } else { return default_bin_; } diff --git a/src/io/dense_nbits_bin.hpp b/src/io/dense_nbits_bin.hpp index 319dd3127fa4..7f1c3ed089cc 100644 --- a/src/io/dense_nbits_bin.hpp +++ b/src/io/dense_nbits_bin.hpp @@ -22,9 +22,9 @@ class Dense4bitsBinIterator : public BinIterator { max_bin_(static_cast(max_bin)), default_bin_(static_cast(default_bin)) { if (default_bin_ == 0) { - bias_ = 1; + offset_ = 1; } else { - bias_ = 0; + offset_ = 0; } } inline uint32_t RawGet(data_size_t idx) override; @@ -36,7 +36,7 @@ class Dense4bitsBinIterator : public BinIterator { uint8_t min_bin_; uint8_t max_bin_; uint8_t default_bin_; - uint8_t bias_; + uint8_t offset_; }; class Dense4bitsBin : public Bin { @@ -383,7 +383,7 @@ class Dense4bitsBin : public Bin { uint32_t Dense4bitsBinIterator::Get(data_size_t idx) { const auto bin = (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; if (bin >= min_bin_ && bin <= max_bin_) { - return bin - min_bin_ + bias_; + return bin - min_bin_ + offset_; } else { return default_bin_; } diff --git a/src/io/parser.hpp b/src/io/parser.hpp index 6bfe94a3f036..1cfde0635a57 100644 --- a/src/io/parser.hpp +++ b/src/io/parser.hpp @@ -24,15 +24,15 @@ class CSVParser: public Parser { std::vector>* out_features, double* out_label) const override { int idx = 0; double val = 0.0f; - int bias = 0; + int offset = 0; *out_label = 0.0f; while (*str != '\0') { str = Common::Atof(str, &val); if (idx == label_idx_) { *out_label = val; - bias = -1; + offset = -1; } else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) { - out_features->emplace_back(idx + bias, val); + out_features->emplace_back(idx + offset, val); } ++idx; if (*str == ',') { @@ -61,14 +61,14 @@ class TSVParser: public Parser { std::vector>* out_features, double* out_label) const override { int idx = 0; double val = 0.0f; - int bias = 0; + int offset = 0; while (*str != '\0') { str = Common::Atof(str, &val); if (idx == label_idx_) { *out_label = val; - bias = -1; + offset = -1; } else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) { - out_features->emplace_back(idx + bias, val); + out_features->emplace_back(idx + offset, val); } ++idx; if (*str == '\t') { diff --git a/src/io/sparse_bin.hpp b/src/io/sparse_bin.hpp index 5c98917d310a..af2a1a722c2c 100644 --- a/src/io/sparse_bin.hpp +++ b/src/io/sparse_bin.hpp @@ -31,9 +31,9 @@ class SparseBinIterator: public BinIterator { max_bin_(static_cast(max_bin)), default_bin_(static_cast(default_bin)) { if (default_bin_ == 0) { - bias_ = 1; + offset_ = 1; } else { - bias_ = 0; + offset_ = 0; } Reset(0); } @@ -48,7 +48,7 @@ class SparseBinIterator: public BinIterator { inline uint32_t Get(data_size_t idx) override { VAL_T ret = InnerRawGet(idx); if (ret >= min_bin_ && ret <= max_bin_) { - return ret - min_bin_ + bias_; + return ret - min_bin_ + offset_; } else { return default_bin_; } @@ -63,7 +63,7 @@ class SparseBinIterator: public BinIterator { VAL_T min_bin_; VAL_T max_bin_; VAL_T default_bin_; - uint8_t bias_; + uint8_t offset_; }; template diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index 60ec04078ff2..c133e1f75fdf 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -221,8 +221,8 @@ class MulticlassOVA: public ObjectiveFunction { void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override { for (int i = 0; i < num_class_; ++i) { - int64_t bias = static_cast(num_data_) * i; - binary_loss_[i]->GetGradients(score + bias, gradients + bias, hessians + bias); + int64_t offset = static_cast(num_data_) * i; + binary_loss_[i]->GetGradients(score + offset, gradients + offset, hessians + offset); } } diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 73e91c9f0dde..7a0f27ed4b09 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -23,7 +23,7 @@ class FeatureMetainfo { public: int num_bin; MissingType missing_type; - int8_t bias = 0; + int8_t offset = 0; uint32_t default_bin; int8_t monotone_type; double penalty; @@ -73,7 +73,7 @@ class FeatureHistogram { * \param other The histogram that want to subtract */ void Subtract(const FeatureHistogram& other) { - for (int i = 0; i < meta_->num_bin - meta_->bias; ++i) { + for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) { data_[i].cnt -= other.data_[i].cnt; data_[i].sum_gradients -= other.data_[i].sum_gradients; data_[i].sum_hessians -= other.data_[i].sum_hessians; @@ -298,7 +298,7 @@ class FeatureHistogram { double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; // do stuff here - const int8_t bias = meta_->bias; + const int8_t offset = meta_->offset; double sum_right_gradient = 0.0f; double sum_right_hessian = kEpsilon; @@ -313,15 +313,15 @@ class FeatureHistogram { use_na_as_missing = true; } - int t = meta_->num_bin - 1 - bias - use_na_as_missing; - const int t_end = 1 - bias; + int t = meta_->num_bin - 1 - offset - use_na_as_missing; + const int t_end = 1 - offset; // from right to left, and we don't need data in bin0 for (; t >= t_end; --t) { - if (static_cast(t + bias) < threshold) { break; } + if (static_cast(t + offset) < threshold) { break; } // need to skip default bin - if (skip_default_bin && (t + bias) == static_cast(meta_->default_bin)) { continue; } + if (skip_default_bin && (t + offset) == static_cast(meta_->default_bin)) { continue; } sum_right_gradient += data_[t].sum_gradients; sum_right_hessian += data_[t].sum_hessians; @@ -423,14 +423,14 @@ class FeatureHistogram { * \brief Binary size of this histogram */ int SizeOfHistgram() const { - return (meta_->num_bin - meta_->bias) * sizeof(HistogramBinEntry); + return (meta_->num_bin - meta_->offset) * sizeof(HistogramBinEntry); } /*! * \brief Restore histogram from memory */ void FromMemory(char* memory_data) { - std::memcpy(data_, memory_data, (meta_->num_bin - meta_->bias) * sizeof(HistogramBinEntry)); + std::memcpy(data_, memory_data, (meta_->num_bin - meta_->offset) * sizeof(HistogramBinEntry)); } /*! @@ -507,7 +507,7 @@ class FeatureHistogram { void FindBestThresholdSequence(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint, double min_gain_shift, SplitInfo* output, int dir, bool skip_default_bin, bool use_na_as_missing) { - const int8_t bias = meta_->bias; + const int8_t offset = meta_->offset; double best_sum_left_gradient = NAN; double best_sum_left_hessian = NAN; @@ -520,13 +520,13 @@ class FeatureHistogram { double sum_right_hessian = kEpsilon; data_size_t right_count = 0; - int t = meta_->num_bin - 1 - bias - use_na_as_missing; - const int t_end = 1 - bias; + int t = meta_->num_bin - 1 - offset - use_na_as_missing; + const int t_end = 1 - offset; // from right to left, and we don't need data in bin0 for (; t >= t_end; --t) { // need to skip default bin - if (skip_default_bin && (t + bias) == static_cast(meta_->default_bin)) { continue; } + if (skip_default_bin && (t + offset) == static_cast(meta_->default_bin)) { continue; } sum_right_gradient += data_[t].sum_gradients; sum_right_hessian += data_[t].sum_hessians; @@ -558,7 +558,7 @@ class FeatureHistogram { best_sum_left_gradient = sum_left_gradient; best_sum_left_hessian = sum_left_hessian; // left is <= threshold, right is > threshold. so this is t-1 - best_threshold = static_cast(t - 1 + bias); + best_threshold = static_cast(t - 1 + offset); best_gain = current_gain; } } @@ -568,13 +568,13 @@ class FeatureHistogram { data_size_t left_count = 0; int t = 0; - const int t_end = meta_->num_bin - 2 - bias; + const int t_end = meta_->num_bin - 2 - offset; - if (use_na_as_missing && bias == 1) { + if (use_na_as_missing && offset == 1) { sum_left_gradient = sum_gradient; sum_left_hessian = sum_hessian - kEpsilon; left_count = num_data; - for (int i = 0; i < meta_->num_bin - bias; ++i) { + for (int i = 0; i < meta_->num_bin - offset; ++i) { sum_left_gradient -= data_[i].sum_gradients; sum_left_hessian -= data_[i].sum_hessians; left_count -= data_[i].cnt; @@ -584,7 +584,7 @@ class FeatureHistogram { for (; t <= t_end; ++t) { // need to skip default bin - if (skip_default_bin && (t + bias) == static_cast(meta_->default_bin)) { continue; } + if (skip_default_bin && (t + offset) == static_cast(meta_->default_bin)) { continue; } if (t >= 0) { sum_left_gradient += data_[t].sum_gradients; sum_left_hessian += data_[t].sum_hessians; @@ -616,7 +616,7 @@ class FeatureHistogram { best_left_count = left_count; best_sum_left_gradient = sum_left_gradient; best_sum_left_hessian = sum_left_hessian; - best_threshold = static_cast(t + bias); + best_threshold = static_cast(t + offset); best_gain = current_gain; } } @@ -711,9 +711,9 @@ class HistogramPool { feature_metas_[i].monotone_type = train_data->FeatureMonotone(i); feature_metas_[i].penalty = train_data->FeaturePenalte(i); if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) { - feature_metas_[i].bias = 1; + feature_metas_[i].offset = 1; } else { - feature_metas_[i].bias = 0; + feature_metas_[i].offset = 0; } feature_metas_[i].config = config; feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type(); diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index cb18e3779ba6..b6e34dc106fe 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -75,9 +75,9 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data, b feature_metas_[i].monotone_type = train_data->FeatureMonotone(i); feature_metas_[i].penalty = train_data->FeaturePenalte(i); if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) { - feature_metas_[i].bias = 1; + feature_metas_[i].offset = 1; } else { - feature_metas_[i].bias = 0; + feature_metas_[i].offset = 0; } feature_metas_[i].config = this->config_; feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();