From 95be175046a582b1c787fc3f9b95329baebf2a45 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 10:58:47 +0100 Subject: [PATCH 01/42] No need to pass the tree to all fuctions related to monotone constraints because the pointer is shared. --- src/treelearner/monotone_constraints.hpp | 71 ++++++++++++------------ src/treelearner/serial_tree_learner.cpp | 4 +- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index dcad0d6d3288..a56c6e0f6bab 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -53,10 +53,10 @@ class LeafConstraintsBase { virtual ~LeafConstraintsBase() {} virtual const ConstraintEntry& Get(int leaf_idx) const = 0; virtual void Reset() = 0; - virtual void BeforeSplit(const Tree* tree, int leaf, int new_leaf, + virtual void BeforeSplit(int leaf, int new_leaf, int8_t monotone_type) = 0; virtual std::vector Update( - const Tree* tree, bool is_numerical_split, + bool is_numerical_split, int leaf, int new_leaf, int8_t monotone_type, double right_output, double left_output, int split_feature, const SplitInfo& split_info, const std::vector& best_split_per_leaf) = 0; @@ -78,7 +78,7 @@ class LeafConstraintsBase { tree_ = tree; } - private: + protected: const Tree* tree_; }; @@ -94,10 +94,9 @@ class BasicLeafConstraints : public LeafConstraintsBase { } } - void BeforeSplit(const Tree*, int, int, int8_t) override {} + void BeforeSplit(int, int, int8_t) override {} - std::vector Update(const Tree*, - bool is_numerical_split, int leaf, int new_leaf, + std::vector Update(bool is_numerical_split, int leaf, int new_leaf, int8_t monotone_type, double right_output, double left_output, int, const SplitInfo& , const std::vector&) override { @@ -138,7 +137,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { leaves_to_update_.clear(); } - void BeforeSplit(const Tree* tree, int leaf, int new_leaf, + void BeforeSplit(int leaf, int new_leaf, int8_t monotone_type) override { if (monotone_type != 0 || leaf_is_in_monotone_subtree_[leaf]) { leaf_is_in_monotone_subtree_[leaf] = true; @@ -148,7 +147,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { CHECK_GE(new_leaf - 1, 0); CHECK_LT(static_cast(new_leaf - 1), node_parent_.size()); #endif - node_parent_[new_leaf - 1] = tree->leaf_parent(leaf); + node_parent_[new_leaf - 1] = tree_->leaf_parent(leaf); } void UpdateConstraintsWithOutputs(bool is_numerical_split, int leaf, @@ -166,7 +165,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { } } - std::vector Update(const Tree* tree, bool is_numerical_split, int leaf, + std::vector Update(bool is_numerical_split, int leaf, int new_leaf, int8_t monotone_type, double right_output, double left_output, int split_feature, const SplitInfo& split_info, @@ -177,7 +176,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { monotone_type, right_output, left_output); // Initialize variables to store information while going up the tree - int depth = tree->leaf_depth(new_leaf) - 1; + int depth = tree_->leaf_depth(new_leaf) - 1; std::vector features_of_splits_going_up_from_original_leaf; std::vector thresholds_of_splits_going_up_from_original_leaf; @@ -187,7 +186,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { thresholds_of_splits_going_up_from_original_leaf.reserve(depth); was_original_leaf_right_child_of_split.reserve(depth); - GoUpToFindLeavesToUpdate(tree, tree->leaf_parent(new_leaf), + GoUpToFindLeavesToUpdate(tree_->leaf_parent(new_leaf), &features_of_splits_going_up_from_original_leaf, &thresholds_of_splits_going_up_from_original_leaf, &was_original_leaf_right_child_of_split, @@ -232,7 +231,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // Recursive function that goes up the tree, and then down to find leaves that // have constraints to be updated void GoUpToFindLeavesToUpdate( - const Tree* tree, int node_idx, + int node_idx, std::vector* features_of_splits_going_up_from_original_leaf, std::vector* thresholds_of_splits_going_up_from_original_leaf, std::vector* was_original_leaf_right_child_of_split, @@ -245,11 +244,11 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { int parent_idx = node_parent_[node_idx]; // if not at the root if (parent_idx != -1) { - int inner_feature = tree->split_feature_inner(parent_idx); - int feature = tree->split_feature(parent_idx); + int inner_feature = tree_->split_feature_inner(parent_idx); + int feature = tree_->split_feature(parent_idx); int8_t monotone_type = config_->monotone_constraints[feature]; - bool is_in_right_child = tree->right_child(parent_idx) == node_idx; - bool is_split_numerical = tree->IsNumericalSplit(node_idx); + bool is_in_right_child = tree_->right_child(parent_idx) == node_idx; + bool is_split_numerical = tree_->IsNumericalSplit(node_idx); // this is just an optimisation not to waste time going down in subtrees // where there won't be any leaf to update @@ -264,8 +263,8 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { if (monotone_type != 0) { // these variables correspond to the current split we encounter going // up the tree - int left_child_idx = tree->left_child(parent_idx); - int right_child_idx = tree->right_child(parent_idx); + int left_child_idx = tree_->left_child(parent_idx); + int right_child_idx = tree_->right_child(parent_idx); bool left_child_is_curr_idx = (left_child_idx == node_idx); int opposite_child_idx = (left_child_is_curr_idx) ? right_child_idx : left_child_idx; @@ -277,7 +276,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // so the code needs to go down in the the opposite child // to see which leaves' constraints need to be updated GoDownToFindLeavesToUpdate( - tree, opposite_child_idx, + opposite_child_idx, *features_of_splits_going_up_from_original_leaf, *thresholds_of_splits_going_up_from_original_leaf, *was_original_leaf_right_child_of_split, @@ -290,16 +289,16 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // is actually contiguous to the original 2 leaves and should be updated // so the variables associated with the split need to be recorded was_original_leaf_right_child_of_split->push_back( - tree->right_child(parent_idx) == node_idx); + tree_->right_child(parent_idx) == node_idx); thresholds_of_splits_going_up_from_original_leaf->push_back( - tree->threshold_in_bin(parent_idx)); + tree_->threshold_in_bin(parent_idx)); features_of_splits_going_up_from_original_leaf->push_back( - tree->split_feature_inner(parent_idx)); + tree_->split_feature_inner(parent_idx)); } // since current node is not the root, keep going up GoUpToFindLeavesToUpdate( - tree, parent_idx, features_of_splits_going_up_from_original_leaf, + parent_idx, features_of_splits_going_up_from_original_leaf, thresholds_of_splits_going_up_from_original_leaf, was_original_leaf_right_child_of_split, split_feature, split_info, split_threshold, best_split_per_leaf); @@ -307,7 +306,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { } void GoDownToFindLeavesToUpdate( - const Tree* tree, int node_idx, + int node_idx, const std::vector& features_of_splits_going_up_from_original_leaf, const std::vector& thresholds_of_splits_going_up_from_original_leaf, @@ -345,9 +344,9 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { #ifdef DEBUG if (update_max_constraints) { - CHECK_GE(min_max_constraints.first, tree->LeafOutput(leaf_idx)); + CHECK_GE(min_max_constraints.first, tree_->LeafOutput(leaf_idx)); } else { - CHECK_LE(min_max_constraints.second, tree->LeafOutput(leaf_idx)); + CHECK_LE(min_max_constraints.second, tree_->LeafOutput(leaf_idx)); } #endif // depending on which split made the current leaf and the original leaves contiguous, @@ -368,12 +367,12 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { } else { // if node // check if the children are contiguous with the original leaf std::pair keep_going_left_right = ShouldKeepGoingLeftRight( - tree, node_idx, features_of_splits_going_up_from_original_leaf, + node_idx, features_of_splits_going_up_from_original_leaf, thresholds_of_splits_going_up_from_original_leaf, was_original_leaf_right_child_of_split); - int inner_feature = tree->split_feature_inner(node_idx); - uint32_t threshold = tree->threshold_in_bin(node_idx); - bool is_split_numerical = tree->IsNumericalSplit(node_idx); + int inner_feature = tree_->split_feature_inner(node_idx); + uint32_t threshold = tree_->threshold_in_bin(node_idx); + bool is_split_numerical = tree_->IsNumericalSplit(node_idx); bool use_left_leaf_for_update_right = true; bool use_right_leaf_for_update_left = true; // if the split is on the same feature (categorical variables not supported) @@ -392,7 +391,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // go down left if (keep_going_left_right.first) { GoDownToFindLeavesToUpdate( - tree, tree->left_child(node_idx), + tree_->left_child(node_idx), features_of_splits_going_up_from_original_leaf, thresholds_of_splits_going_up_from_original_leaf, was_original_leaf_right_child_of_split, update_max_constraints, @@ -403,7 +402,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // go down right if (keep_going_left_right.second) { GoDownToFindLeavesToUpdate( - tree, tree->right_child(node_idx), + tree_->right_child(node_idx), features_of_splits_going_up_from_original_leaf, thresholds_of_splits_going_up_from_original_leaf, was_original_leaf_right_child_of_split, update_max_constraints, @@ -415,14 +414,14 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { } std::pair ShouldKeepGoingLeftRight( - const Tree* tree, int node_idx, + int node_idx, const std::vector& features_of_splits_going_up_from_original_leaf, const std::vector& thresholds_of_splits_going_up_from_original_leaf, const std::vector& was_original_leaf_right_child_of_split) { - int inner_feature = tree->split_feature_inner(node_idx); - uint32_t threshold = tree->threshold_in_bin(node_idx); - bool is_split_numerical = tree->IsNumericalSplit(node_idx); + int inner_feature = tree_->split_feature_inner(node_idx); + uint32_t threshold = tree_->threshold_in_bin(node_idx); + bool is_split_numerical = tree_->IsNumericalSplit(node_idx); bool keep_going_right = true; bool keep_going_left = true; diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 8b3699667ad6..42445494018f 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -561,7 +561,7 @@ void SerialTreeLearner::SplitInner(Tree* tree, int best_leaf, int* left_leaf, auto next_leaf_id = tree->NextLeafId(); // update before tree split - constraints_->BeforeSplit(tree, best_leaf, next_leaf_id, + constraints_->BeforeSplit(best_leaf, next_leaf_id, best_split_info.monotone_type); bool is_numerical_split = @@ -657,7 +657,7 @@ void SerialTreeLearner::SplitInner(Tree* tree, int best_leaf, int* left_leaf, best_split_info.left_output); } auto leaves_need_update = constraints_->Update( - tree, is_numerical_split, *left_leaf, *right_leaf, + is_numerical_split, *left_leaf, *right_leaf, best_split_info.monotone_type, best_split_info.right_output, best_split_info.left_output, inner_feature_index, best_split_info, best_split_per_leaf_); From bb6668c031dc3b1f594f16ee569e1a0dcca304f1 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 10:59:40 +0100 Subject: [PATCH 02/42] Fix OppositeChildShouldBeUpdated numerical split optimisation. --- src/treelearner/monotone_constraints.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index a56c6e0f6bab..47e6fd707fa0 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -202,7 +202,6 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { int inner_feature, const std::vector& was_original_leaf_right_child_of_split, bool is_in_right_child) { - bool opposite_child_should_be_updated = true; // if the split is categorical, it is not handled by this optimisation, // so the code will have to go down in the other child subtree to see if @@ -220,12 +219,14 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { inner_feature && (was_original_leaf_right_child_of_split[split_idx] == is_in_right_child)) { - opposite_child_should_be_updated = false; - break; + return false; } } + return true; + } + else { + return false; } - return opposite_child_should_be_updated; } // Recursive function that goes up the tree, and then down to find leaves that @@ -248,7 +249,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { int feature = tree_->split_feature(parent_idx); int8_t monotone_type = config_->monotone_constraints[feature]; bool is_in_right_child = tree_->right_child(parent_idx) == node_idx; - bool is_split_numerical = tree_->IsNumericalSplit(node_idx); + bool is_split_numerical = tree_->IsNumericalSplit(parent_idx); // this is just an optimisation not to waste time going down in subtrees // where there won't be any leaf to update From 75ca7086bb783361e8c67b717e35eaac90b49e9d Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 10:59:47 +0100 Subject: [PATCH 03/42] No need to use constraints when computing the output of the root. --- src/treelearner/serial_tree_learner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 42445494018f..00386eb52c88 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -715,9 +715,9 @@ void SerialTreeLearner::ComputeBestSplitForFeature( double parent_output; if (leaf_splits->leaf_index() == 0) { // for root leaf the "parent" output is its own output because we don't apply any smoothing to the root - parent_output = FeatureHistogram::CalculateSplittedLeafOutput( + parent_output = FeatureHistogram::CalculateSplittedLeafOutput( leaf_splits->sum_gradients(), leaf_splits->sum_hessians(), config_->lambda_l1, - config_->lambda_l2, config_->max_delta_step, constraints_->Get(leaf_splits->leaf_index()), + config_->lambda_l2, config_->max_delta_step, ConstraintEntry(), config_->path_smooth, static_cast(num_data), 0); } else { parent_output = leaf_splits->weight(); From 38b9ab19db2acf6fb9b2bd73a6dadc0ca5b6349a Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 10:59:51 +0100 Subject: [PATCH 04/42] Refactor existing constraints. --- src/treelearner/feature_histogram.hpp | 44 ++++++--- src/treelearner/monotone_constraints.hpp | 119 +++++++++++++++++------ src/treelearner/serial_tree_learner.cpp | 17 +++- 3 files changed, 134 insertions(+), 46 deletions(-) diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 7fa341c67acb..954086459bb2 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -84,7 +84,7 @@ class FeatureHistogram { void FindBestThreshold(double sum_gradient, double sum_hessian, data_size_t num_data, - const ConstraintEntry& constraints, + FeatureConstraint* constraints, double parent_output, SplitInfo* output) { output->default_left = true; @@ -158,7 +158,7 @@ class FeatureHistogram { #define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING #define LAMBDA_ARGUMENTS \ double sum_gradient, double sum_hessian, data_size_t num_data, \ - const ConstraintEntry &constraints, double parent_output, SplitInfo *output + FeatureConstraint* constraints, double parent_output, SplitInfo *output #define BEFORE_ARGUMENTS sum_gradient, sum_hessian, parent_output, num_data, output, &rand_threshold #define FUNC_ARGUMENTS \ sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, \ @@ -278,7 +278,7 @@ class FeatureHistogram { void FindBestThresholdCategoricalInner(double sum_gradient, double sum_hessian, data_size_t num_data, - const ConstraintEntry& constraints, + FeatureConstraint* constraints, double parent_output, SplitInfo* output) { is_splittable_ = false; @@ -288,6 +288,7 @@ class FeatureHistogram { double best_sum_left_gradient = 0; double best_sum_left_hessian = 0; double gain_shift; + constraints->InitCumulativeConstraints(true); if (USE_SMOOTHING) { gain_shift = GetLeafGainGivenOutput( sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, parent_output); @@ -474,14 +475,14 @@ class FeatureHistogram { output->left_output = CalculateSplittedLeafOutput( best_sum_left_gradient, best_sum_left_hessian, meta_->config->lambda_l1, l2, meta_->config->max_delta_step, - constraints, meta_->config->path_smooth, best_left_count, parent_output); + constraints->LeftToBasicConstraint(), meta_->config->path_smooth, best_left_count, parent_output); output->left_count = best_left_count; output->left_sum_gradient = best_sum_left_gradient; output->left_sum_hessian = best_sum_left_hessian - kEpsilon; output->right_output = CalculateSplittedLeafOutput( sum_gradient - best_sum_left_gradient, sum_hessian - best_sum_left_hessian, meta_->config->lambda_l1, l2, - meta_->config->max_delta_step, constraints, meta_->config->path_smooth, + meta_->config->max_delta_step, constraints->RightToBasicConstraint(), meta_->config->path_smooth, num_data - best_left_count, parent_output); output->right_count = num_data - best_left_count; output->right_sum_gradient = sum_gradient - best_sum_left_gradient; @@ -763,7 +764,7 @@ class FeatureHistogram { template static double CalculateSplittedLeafOutput( double sum_gradients, double sum_hessians, double l1, double l2, - double max_delta_step, const ConstraintEntry& constraints, + double max_delta_step, const BasicConstraint constraints, double smoothing, data_size_t num_data, double parent_output) { double ret = CalculateSplittedLeafOutput( sum_gradients, sum_hessians, l1, l2, max_delta_step, smoothing, num_data, parent_output); @@ -784,7 +785,7 @@ class FeatureHistogram { double sum_right_gradients, double sum_right_hessians, double l1, double l2, double max_delta_step, - const ConstraintEntry& constraints, + FeatureConstraint* constraints, int8_t monotone_constraint, double smoothing, data_size_t left_count, @@ -803,11 +804,11 @@ class FeatureHistogram { double left_output = CalculateSplittedLeafOutput( sum_left_gradients, sum_left_hessians, l1, l2, max_delta_step, - constraints, smoothing, left_count, parent_output); + constraints->LeftToBasicConstraint(), smoothing, left_count, parent_output); double right_output = CalculateSplittedLeafOutput( sum_right_gradients, sum_right_hessians, l1, l2, max_delta_step, - constraints, smoothing, right_count, parent_output); + constraints->RightToBasicConstraint(), smoothing, right_count, parent_output); if (((monotone_constraint > 0) && (left_output > right_output)) || ((monotone_constraint < 0) && (left_output < right_output))) { return 0; @@ -850,11 +851,12 @@ class FeatureHistogram { } } + // FIXME Make constraints const again and have the cumulative constraints outside of it? template void FindBestThresholdSequentially(double sum_gradient, double sum_hessian, data_size_t num_data, - const ConstraintEntry& constraints, + FeatureConstraint* constraints, double min_gain_shift, SplitInfo* output, int rand_threshold, double parent_output) { const int8_t offset = meta_->offset; @@ -864,6 +866,13 @@ class FeatureHistogram { data_size_t best_left_count = 0; uint32_t best_threshold = static_cast(meta_->num_bin); const double cnt_factor = num_data / sum_hessian; + BasicConstraint best_right_constraints; + BasicConstraint best_left_constraints; + + bool constraint_update_necessary = + constraints->ConstraintDifferentDependingOnThreshold(); + constraints->InitCumulativeConstraints(REVERSE); + if (REVERSE) { double sum_right_gradient = 0.0f; double sum_right_hessian = kEpsilon; @@ -910,6 +919,11 @@ class FeatureHistogram { continue; } } + + if (constraint_update_necessary) { + constraints->Update(t + offset); + } + // current split gain double current_gain = GetSplitGains( sum_left_gradient, sum_left_hessian, sum_right_gradient, @@ -932,6 +946,8 @@ class FeatureHistogram { // left is <= threshold, right is > threshold. so this is t-1 best_threshold = static_cast(t - 1 + offset); best_gain = current_gain; + best_right_constraints = (constraints->RightToBasicConstraint()); + best_left_constraints = (constraints->LeftToBasicConstraint()); } } } else { @@ -1016,6 +1032,8 @@ class FeatureHistogram { best_sum_left_hessian = sum_left_hessian; best_threshold = static_cast(t + offset); best_gain = current_gain; + best_right_constraints = (constraints->RightToBasicConstraint()); + best_left_constraints = (constraints->LeftToBasicConstraint()); } } } @@ -1027,7 +1045,7 @@ class FeatureHistogram { CalculateSplittedLeafOutput( best_sum_left_gradient, best_sum_left_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, - meta_->config->max_delta_step, constraints, meta_->config->path_smooth, + meta_->config->max_delta_step, best_left_constraints, meta_->config->path_smooth, best_left_count, parent_output); output->left_count = best_left_count; output->left_sum_gradient = best_sum_left_gradient; @@ -1037,7 +1055,7 @@ class FeatureHistogram { sum_gradient - best_sum_left_gradient, sum_hessian - best_sum_left_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step, - constraints, meta_->config->path_smooth, num_data - best_left_count, + best_right_constraints, meta_->config->path_smooth, num_data - best_left_count, parent_output); output->right_count = num_data - best_left_count; output->right_sum_gradient = sum_gradient - best_sum_left_gradient; @@ -1053,7 +1071,7 @@ class FeatureHistogram { hist_t* data_; bool is_splittable_ = true; - std::function find_best_threshold_fun_; }; diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 47e6fd707fa0..e17ce0ccecb7 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -16,22 +16,60 @@ namespace LightGBM { -struct ConstraintEntry { +class LeafConstraintsBase; + +struct BasicConstraint { double min = -std::numeric_limits::max(); double max = std::numeric_limits::max(); - ConstraintEntry() {} + BasicConstraint(double min, double max) : min(min), max(max) {} + + BasicConstraint() = default; +}; + +struct FeatureConstraint { + virtual void InitCumulativeConstraints(bool) {}; + virtual void Update(int) {}; + virtual BasicConstraint LeftToBasicConstraint() const = 0; + virtual BasicConstraint RightToBasicConstraint() const = 0; + virtual bool ConstraintDifferentDependingOnThreshold() const = 0; +}; + +struct ConstraintEntry { + virtual void Reset() = 0; + virtual void UpdateMin(double new_min) = 0; + virtual void UpdateMax(double new_max) = 0; + virtual bool UpdateMinAndReturnBoolIfChanged(double new_min) = 0; + virtual bool UpdateMaxAndReturnBoolIfChanged(double new_max) = 0; + virtual ConstraintEntry *clone() const = 0; + + virtual void RecomputeConstraintsIfNeeded(LeafConstraintsBase *, int, int, + uint32_t) {}; + + virtual FeatureConstraint *GetFeatureConstraint(int feature_index) = 0; +}; + +// used by both BasicLeafConstraints and IntermediateLeafConstraints +struct BasicConstraintEntry : ConstraintEntry, + FeatureConstraint, + BasicConstraint { - void Reset() { + bool ConstraintDifferentDependingOnThreshold() const final { return false; } + + BasicConstraintEntry *clone() const final { + return new BasicConstraintEntry(*this); + }; + + void Reset() final { min = -std::numeric_limits::max(); max = std::numeric_limits::max(); } - void UpdateMin(double new_min) { min = std::max(new_min, min); } + void UpdateMin(double new_min) final { min = std::max(new_min, min); } - void UpdateMax(double new_max) { max = std::min(new_max, max); } + void UpdateMax(double new_max) final { max = std::min(new_max, max); } - bool UpdateMinAndReturnBoolIfChanged(double new_min) { + bool UpdateMinAndReturnBoolIfChanged(double new_min) final { if (new_min > min) { min = new_min; return true; @@ -39,19 +77,26 @@ struct ConstraintEntry { return false; } - bool UpdateMaxAndReturnBoolIfChanged(double new_max) { + bool UpdateMaxAndReturnBoolIfChanged(double new_max) final { if (new_max < max) { max = new_max; return true; } return false; } + + BasicConstraint LeftToBasicConstraint() const final { return *this; } + + BasicConstraint RightToBasicConstraint() const final { return *this; } + + FeatureConstraint *GetFeatureConstraint(int) final { return this; } }; class LeafConstraintsBase { public: virtual ~LeafConstraintsBase() {} - virtual const ConstraintEntry& Get(int leaf_idx) const = 0; + virtual const ConstraintEntry* Get(int leaf_idx) = 0; + virtual FeatureConstraint* GetFeatureConstraint(int leaf_idx, int feature_index) = 0; virtual void Reset() = 0; virtual void BeforeSplit(int leaf, int new_leaf, int8_t monotone_type) = 0; @@ -61,7 +106,11 @@ class LeafConstraintsBase { double left_output, int split_feature, const SplitInfo& split_info, const std::vector& best_split_per_leaf) = 0; - inline static LeafConstraintsBase* Create(const Config* config, int num_leaves); + virtual void RecomputeConstraintsIfNeeded( + LeafConstraintsBase *constraints_, + int feature_for_constraint, int leaf_idx, uint32_t it_end) = 0; + + inline static LeafConstraintsBase* Create(const Config* config, int num_leaves, int num_features); double ComputeMonotoneSplitGainPenalty(int leaf_index, double penalization) { int depth = tree_->leaf_depth(leaf_index); @@ -85,40 +134,52 @@ class LeafConstraintsBase { class BasicLeafConstraints : public LeafConstraintsBase { public: explicit BasicLeafConstraints(int num_leaves) : num_leaves_(num_leaves) { - entries_.resize(num_leaves_); + for (int i = 0; i < num_leaves; i++) { + entries_.push_back(new BasicConstraintEntry()); + } } void Reset() override { - for (auto& entry : entries_) { - entry.Reset(); + for (auto entry : entries_) { + entry->Reset(); } } + void RecomputeConstraintsIfNeeded( + LeafConstraintsBase* constraints_, + int feature_for_constraint, int leaf_idx, uint32_t it_end) { + entries_[~leaf_idx]->RecomputeConstraintsIfNeeded(constraints_, feature_for_constraint, leaf_idx, it_end); + } + void BeforeSplit(int, int, int8_t) override {} std::vector Update(bool is_numerical_split, int leaf, int new_leaf, int8_t monotone_type, double right_output, double left_output, int, const SplitInfo& , const std::vector&) override { - entries_[new_leaf] = entries_[leaf]; + entries_[new_leaf] = entries_[leaf]->clone(); if (is_numerical_split) { double mid = (left_output + right_output) / 2.0f; if (monotone_type < 0) { - entries_[leaf].UpdateMin(mid); - entries_[new_leaf].UpdateMax(mid); + entries_[leaf]->UpdateMin(mid); + entries_[new_leaf]->UpdateMax(mid); } else if (monotone_type > 0) { - entries_[leaf].UpdateMax(mid); - entries_[new_leaf].UpdateMin(mid); + entries_[leaf]->UpdateMax(mid); + entries_[new_leaf]->UpdateMin(mid); } } return std::vector(); } - const ConstraintEntry& Get(int leaf_idx) const override { return entries_[leaf_idx]; } + const ConstraintEntry* Get(int leaf_idx) override { return entries_[leaf_idx]; } + + FeatureConstraint* GetFeatureConstraint(int leaf_idx, int feature_index) { + return entries_[leaf_idx]->GetFeatureConstraint(feature_index); + } protected: int num_leaves_; - std::vector entries_; + std::vector entries_; }; class IntermediateLeafConstraints : public BasicLeafConstraints { @@ -153,14 +214,14 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { void UpdateConstraintsWithOutputs(bool is_numerical_split, int leaf, int new_leaf, int8_t monotone_type, double right_output, double left_output) { - entries_[new_leaf] = entries_[leaf]; + entries_[new_leaf] = entries_[leaf]->clone(); if (is_numerical_split) { if (monotone_type < 0) { - entries_[leaf].UpdateMin(right_output); - entries_[new_leaf].UpdateMax(left_output); + entries_[leaf]->UpdateMin(right_output); + entries_[new_leaf]->UpdateMax(left_output); } else if (monotone_type > 0) { - entries_[leaf].UpdateMax(right_output); - entries_[new_leaf].UpdateMin(left_output); + entries_[leaf]->UpdateMax(right_output); + entries_[new_leaf]->UpdateMin(left_output); } } } @@ -169,7 +230,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { int new_leaf, int8_t monotone_type, double right_output, double left_output, int split_feature, const SplitInfo& split_info, - const std::vector& best_split_per_leaf) override { + const std::vector& best_split_per_leaf) final { leaves_to_update_.clear(); if (leaf_is_in_monotone_subtree_[leaf]) { UpdateConstraintsWithOutputs(is_numerical_split, leaf, new_leaf, @@ -353,10 +414,10 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { // depending on which split made the current leaf and the original leaves contiguous, // either the min constraint or the max constraint of the current leaf need to be updated if (!update_max_constraints) { - something_changed = entries_[leaf_idx].UpdateMinAndReturnBoolIfChanged( + something_changed = entries_[leaf_idx]->UpdateMinAndReturnBoolIfChanged( min_max_constraints.second); } else { - something_changed = entries_[leaf_idx].UpdateMaxAndReturnBoolIfChanged( + something_changed = entries_[leaf_idx]->UpdateMaxAndReturnBoolIfChanged( min_max_constraints.first); } // If constraints were not updated, then there is no need to update the leaf @@ -456,7 +517,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { return std::pair(keep_going_left, keep_going_right); } - private: + protected: const Config* config_; std::vector leaves_to_update_; // add parent node information @@ -466,7 +527,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { }; LeafConstraintsBase* LeafConstraintsBase::Create(const Config* config, - int num_leaves) { + int num_leaves, int num_features) { if (config->monotone_constraints_method == "intermediate") { return new IntermediateLeafConstraints(config, num_leaves); } diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 00386eb52c88..0853d32380dc 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -46,7 +46,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian // push split information for all leaves best_split_per_leaf_.resize(config_->num_leaves); - constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves)); + constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves, train_data_->num_features())); // initialize splits for leaf smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data())); @@ -146,7 +146,7 @@ void SerialTreeLearner::ResetConfig(const Config* config) { } cegb_->Init(); } - constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves)); + constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves, train_data_->num_features())); } Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians) { @@ -711,20 +711,29 @@ void SerialTreeLearner::ComputeBestSplitForFeature( FeatureHistogram* histogram_array_, int feature_index, int real_fidx, bool is_feature_used, int num_data, const LeafSplits* leaf_splits, SplitInfo* best_split) { + + bool is_feature_numerical = train_data_->FeatureBinMapper(feature_index) + ->bin_type() == BinType::NumericalBin; + if (is_feature_numerical) { + constraints_->RecomputeConstraintsIfNeeded( + constraints_.get(), feature_index, ~(leaf_splits->leaf_index()), + train_data_->FeatureNumBin(feature_index)); + } + SplitInfo new_split; double parent_output; if (leaf_splits->leaf_index() == 0) { // for root leaf the "parent" output is its own output because we don't apply any smoothing to the root parent_output = FeatureHistogram::CalculateSplittedLeafOutput( leaf_splits->sum_gradients(), leaf_splits->sum_hessians(), config_->lambda_l1, - config_->lambda_l2, config_->max_delta_step, ConstraintEntry(), + config_->lambda_l2, config_->max_delta_step, BasicConstraint(), config_->path_smooth, static_cast(num_data), 0); } else { parent_output = leaf_splits->weight(); } histogram_array_[feature_index].FindBestThreshold( leaf_splits->sum_gradients(), leaf_splits->sum_hessians(), num_data, - constraints_->Get(leaf_splits->leaf_index()), parent_output, &new_split); + constraints_->GetFeatureConstraint(leaf_splits->leaf_index(), feature_index), parent_output, &new_split); new_split.feature = real_fidx; if (cegb_ != nullptr) { new_split.gain -= From 447eb3b24c406861b9e1e8e162fe1cdeafaf5d42 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 10:59:57 +0100 Subject: [PATCH 05/42] Add advanced constraints method. --- src/treelearner/monotone_constraints.hpp | 646 +++++++++++++++++++++++ 1 file changed, 646 insertions(+) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index e17ce0ccecb7..f0ba8a00550a 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -92,6 +92,236 @@ struct BasicConstraintEntry : ConstraintEntry, FeatureConstraint *GetFeatureConstraint(int) final { return this; } }; +struct FeatureMinOrMaxConstraints { + std::vector constraints; + // the constraint number i is valid on the slice + // [thresholds[i]:threshold[i+1]) + // if threshold[i+1] does not exist, then it is valid for thresholds following + // threshold[i] + std::vector thresholds; + + FeatureMinOrMaxConstraints() { + constraints.reserve(32); + thresholds.reserve(32); + } + + size_t Size() const { return thresholds.size(); } + + FeatureMinOrMaxConstraints(double extremum) { + constraints.reserve(32); + thresholds.reserve(32); + + constraints.push_back(extremum); + thresholds.push_back(0); + } + + void Reset(double extremum) { + constraints.resize(1); + constraints[0] = extremum; + thresholds.resize(1); + thresholds[0] = 0; + } + + void UpdateMin(double min) { + for (unsigned int j = 0; j < constraints.size(); j++) { + if (min > constraints[j]) { + constraints[j] = min; + } + } + } + + void UpdateMax(double max) { + for (unsigned int j = 0; j < constraints.size(); j++) { + if (max < constraints[j]) { + constraints[j] = max; + } + } + } +}; + +struct CumulativeFeatureConstraint { + std::vector thresholds_min_constraints; + std::vector thresholds_max_constraints; + std::vector cumulative_min_constraints_left_to_right; + std::vector cumulative_min_constraints_right_to_left; + std::vector cumulative_max_constraints_left_to_right; + std::vector cumulative_max_constraints_right_to_left; + unsigned int index_min_constraints_left_to_right; + unsigned int index_min_constraints_right_to_left; + unsigned int index_max_constraints_left_to_right; + unsigned int index_max_constraints_right_to_left; + + static void CumulativeExtremum( + const double &(*extremum_function)(const double &, const double &), + bool is_direction_from_left_to_right, + std::vector &cumulative_extremum) { + if (cumulative_extremum.size() == 1) { + return; + } + +#ifdef DEBUG + CHECK(cumulative_extremum.size() != 0); +#endif + + std::size_t n_exts = cumulative_extremum.size(); + int step = is_direction_from_left_to_right ? 1 : -1; + std::size_t start = is_direction_from_left_to_right ? 0 : n_exts - 1; + std::size_t end = is_direction_from_left_to_right ? n_exts - 1 : 0; + + for (auto i = start; i != end; i = i + step) { + cumulative_extremum[i + step] = extremum_function( + cumulative_extremum[i + step], cumulative_extremum[i]); + } + } + + CumulativeFeatureConstraint() = default; + + CumulativeFeatureConstraint(FeatureMinOrMaxConstraints min_constraints, + FeatureMinOrMaxConstraints max_constraints, + bool REVERSE) { + thresholds_min_constraints = min_constraints.thresholds; + thresholds_max_constraints = max_constraints.thresholds; + cumulative_min_constraints_left_to_right = min_constraints.constraints; + cumulative_min_constraints_right_to_left = min_constraints.constraints; + cumulative_max_constraints_left_to_right = max_constraints.constraints; + cumulative_max_constraints_right_to_left = max_constraints.constraints; + + const double &(*min)(const double &, const double &) = std::min; + const double &(*max)(const double &, const double &) = std::max; + CumulativeExtremum(max, true, cumulative_min_constraints_left_to_right); + CumulativeExtremum(max, false, cumulative_min_constraints_right_to_left); + CumulativeExtremum(min, true, cumulative_max_constraints_left_to_right); + CumulativeExtremum(min, false, cumulative_max_constraints_right_to_left); + + if (REVERSE) { + index_min_constraints_left_to_right = + thresholds_min_constraints.size() - 1; + index_min_constraints_right_to_left = + thresholds_min_constraints.size() - 1; + index_max_constraints_left_to_right = + thresholds_max_constraints.size() - 1; + index_max_constraints_right_to_left = + thresholds_max_constraints.size() - 1; + } else { + index_min_constraints_left_to_right = 0; + index_min_constraints_right_to_left = 0; + index_max_constraints_left_to_right = 0; + index_max_constraints_right_to_left = 0; + } + } + + void Update(int threshold) { + while ( + static_cast( + thresholds_min_constraints[index_min_constraints_left_to_right]) > + threshold - 1) { + index_min_constraints_left_to_right -= 1; + } + while ( + static_cast( + thresholds_min_constraints[index_min_constraints_right_to_left]) > + threshold) { + index_min_constraints_right_to_left -= 1; + } + while ( + static_cast( + thresholds_max_constraints[index_max_constraints_left_to_right]) > + threshold - 1) { + index_max_constraints_left_to_right -= 1; + } + while ( + static_cast( + thresholds_max_constraints[index_max_constraints_right_to_left]) > + threshold) { + index_max_constraints_right_to_left -= 1; + } + } + + double GetRightMin() const { + return cumulative_min_constraints_right_to_left + [index_min_constraints_right_to_left]; + } + double GetRightMax() const { + return cumulative_max_constraints_right_to_left + [index_max_constraints_right_to_left]; + } + double GetLeftMin() const { + return cumulative_min_constraints_left_to_right + [index_min_constraints_left_to_right]; + } + double GetLeftMax() const { + return cumulative_max_constraints_left_to_right + [index_max_constraints_left_to_right]; + } +}; + +struct AdvancedFeatureConstraints : FeatureConstraint { + FeatureMinOrMaxConstraints min_constraints; + FeatureMinOrMaxConstraints max_constraints; + CumulativeFeatureConstraint cumulative_feature_constraint; + bool min_constraints_to_be_recomputed = false; + bool max_constraints_to_be_recomputed = false; + + void InitCumulativeConstraints(bool REVERSE) final { + cumulative_feature_constraint = + CumulativeFeatureConstraint(min_constraints, max_constraints, REVERSE); + } + + void Update(int threshold) final { + cumulative_feature_constraint.Update(threshold); + } + + FeatureMinOrMaxConstraints &GetMinConstraints() { return min_constraints; } + + FeatureMinOrMaxConstraints &GetMaxConstraints() { return max_constraints; } + + bool ConstraintDifferentDependingOnThreshold() const final { + return min_constraints.Size() > 1 || max_constraints.Size() > 1; + } + + BasicConstraint RightToBasicConstraint() const final { + return BasicConstraint(cumulative_feature_constraint.GetRightMin(), + cumulative_feature_constraint.GetRightMax()); + } + + BasicConstraint LeftToBasicConstraint() const final { + return BasicConstraint(cumulative_feature_constraint.GetLeftMin(), + cumulative_feature_constraint.GetLeftMax()); + } + + void Reset() { + min_constraints.Reset(-std::numeric_limits::max()); + max_constraints.Reset(std::numeric_limits::max()); + } + + void UpdateMax(double new_max, bool trigger_a_recompute) { + if (trigger_a_recompute) { + max_constraints_to_be_recomputed = true; + } + max_constraints.UpdateMax(new_max); + } + + bool FeatureMaxConstraintsToBeUpdated() { + return max_constraints_to_be_recomputed; + } + + bool FeatureMinConstraintsToBeUpdated() { + return min_constraints_to_be_recomputed; + } + + void ResetUpdates() { + min_constraints_to_be_recomputed = false; + max_constraints_to_be_recomputed = false; + } + + void UpdateMin(double new_min, bool trigger_a_recompute) { + if (trigger_a_recompute) { + min_constraints_to_be_recomputed = true; + } + min_constraints.UpdateMin(new_min); + } +}; + class LeafConstraintsBase { public: virtual ~LeafConstraintsBase() {} @@ -106,6 +336,14 @@ class LeafConstraintsBase { double left_output, int split_feature, const SplitInfo& split_info, const std::vector& best_split_per_leaf) = 0; + virtual void GoUpToFindConstrainingLeaves( + int, int, + std::vector &, + std::vector &, + std::vector &, + FeatureMinOrMaxConstraints &, bool , + uint32_t, uint32_t, uint32_t) {} + virtual void RecomputeConstraintsIfNeeded( LeafConstraintsBase *constraints_, int feature_for_constraint, int leaf_idx, uint32_t it_end) = 0; @@ -131,6 +369,95 @@ class LeafConstraintsBase { const Tree* tree_; }; +// used by AdvancedLeafConstraints +struct AdvancedConstraintEntry : ConstraintEntry { + std::vector constraints; + + AdvancedConstraintEntry *clone() const final { + return new AdvancedConstraintEntry(*this); + }; + + void RecomputeConstraintsIfNeeded(LeafConstraintsBase *constraints_, + int feature_for_constraint, int leaf_idx, + uint32_t it_end) final { + if (constraints[feature_for_constraint] + .FeatureMinConstraintsToBeUpdated() || + constraints[feature_for_constraint] + .FeatureMaxConstraintsToBeUpdated()) { + FeatureMinOrMaxConstraints &constraints_to_be_updated = + constraints[feature_for_constraint].FeatureMinConstraintsToBeUpdated() + ? constraints[feature_for_constraint].GetMinConstraints() + : constraints[feature_for_constraint].GetMaxConstraints(); + + constraints_to_be_updated.Reset( + constraints[feature_for_constraint].FeatureMinConstraintsToBeUpdated() + ? -std::numeric_limits::max() + : std::numeric_limits::max()); + + std::vector features_of_splits_going_up_from_original_leaf = + std::vector(); + std::vector thresholds_of_splits_going_up_from_original_leaf = + std::vector(); + std::vector was_original_leaf_right_child_of_split = + std::vector(); + constraints_->GoUpToFindConstrainingLeaves( + feature_for_constraint, leaf_idx, + features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split, constraints_to_be_updated, + constraints[feature_for_constraint] + .FeatureMinConstraintsToBeUpdated(), + 0, it_end, it_end); + constraints[feature_for_constraint].ResetUpdates(); + } + } + + // for each feature, an array of constraints needs to be stored + AdvancedConstraintEntry(unsigned int num_features) { + constraints.resize(num_features); + } + + void Reset() final { + for (unsigned int i = 0; i < constraints.size(); i++) { + constraints[i].Reset(); + } + } + + void UpdateMin(double new_min) final { + for (unsigned int i = 0; i < constraints.size(); i++) { + constraints[i].UpdateMin(new_min, false); + } + } + + void UpdateMax(double new_max) final { + for (unsigned int i = 0; i < constraints.size(); i++) { + constraints[i].UpdateMax(new_max, false); + } + } + + bool UpdateMinAndReturnBoolIfChanged(double new_min) final { + for (unsigned int i = 0; i < constraints.size(); i++) { + constraints[i].UpdateMin(new_min, true); + } + // even if nothing changed, this could have been unconstrained so it needs + // to be recomputed from the beginning + return true; + } + + bool UpdateMaxAndReturnBoolIfChanged(double new_max) final { + for (unsigned int i = 0; i < constraints.size(); i++) { + constraints[i].UpdateMax(new_max, true); + } + // even if nothing changed, this could have been unconstrained so it needs + // to be recomputed from the beginning + return true; + } + + FeatureConstraint *GetFeatureConstraint(int feature_index) final { + return &constraints[feature_index]; + } +}; + class BasicLeafConstraints : public LeafConstraintsBase { public: explicit BasicLeafConstraints(int num_leaves) : num_leaves_(num_leaves) { @@ -526,11 +853,330 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { std::vector leaf_is_in_monotone_subtree_; }; +class AdvancedLeafConstraints : public IntermediateLeafConstraints { +public: + AdvancedLeafConstraints(const Config *config, int num_leaves, + int num_features) + : IntermediateLeafConstraints(config, num_leaves) { + for (int i = 0; i < num_leaves; i++) { + entries_[i] = new AdvancedConstraintEntry(num_features); + } + } + + // at any point in time, for an index i, the constraint constraint[i] has to + // be valid on [threshold[i]: threshold[i + 1]) (or [threshold[i]: +inf) if i + // is the last index of the array) + void UpdateConstraints(FeatureMinOrMaxConstraints &feature_constraint, + double extremum, uint32_t it_start, uint32_t it_end, + bool use_max_operator, uint32_t last_threshold) { + bool start_done = false; + bool end_done = false; + // previous constraint have to be tracked + // for example when adding a constraints cstr2 on thresholds [1:2), + // on an existing constraints cstr1 on thresholds [0, +inf), + // the thresholds and constraints must become + // [0, 1, 2] and [cstr1, cstr2, cstr1] + // so since we loop through thresholds only once, + // the previous constraint that still applies needs to be recorded + double previous_constraint; + double current_constraint; + for (unsigned int i = 0; i < feature_constraint.thresholds.size();) { + current_constraint = feature_constraint.constraints[i]; + // easy case when the thresholds match + if (feature_constraint.thresholds[i] == it_start) { + feature_constraint.constraints[i] = + (use_max_operator) + ? std::max(extremum, feature_constraint.constraints[i]) + : std::min(extremum, feature_constraint.constraints[i]); + start_done = true; + } + if (feature_constraint.thresholds[i] > it_start) { + // existing constraint is updated if there is a need for it + if (feature_constraint.thresholds[i] < it_end) { + feature_constraint.constraints[i] = + (use_max_operator) + ? std::max(extremum, feature_constraint.constraints[i]) + : std::min(extremum, feature_constraint.constraints[i]); + } + // when thresholds don't match, a new threshold + // and a new constraint may need to be inserted + if (!start_done) { + start_done = true; + if ((use_max_operator && extremum > previous_constraint) || + (!use_max_operator && extremum < previous_constraint)) { + feature_constraint.constraints.insert( + feature_constraint.constraints.begin() + i, extremum); + feature_constraint.thresholds.insert( + feature_constraint.thresholds.begin() + i, it_start); + i += 1; + } + } + } + // easy case when the end thresholds match + if (feature_constraint.thresholds[i] == it_end) { + end_done = true; + i += 1; + break; + } + // if they don't then, the previous constraint needs to be added back + // where the current one ends + if (feature_constraint.thresholds[i] > it_end) { + if (i != 0 && + previous_constraint != feature_constraint.constraints[i - 1]) { + feature_constraint.constraints.insert( + feature_constraint.constraints.begin() + i, previous_constraint); + feature_constraint.thresholds.insert( + feature_constraint.thresholds.begin() + i, it_end); + } + end_done = true; + i += 1; + break; + } + // If 2 successive constraints are the same then the second one may as + // well be deleted + if (i != 0 && feature_constraint.constraints[i] == + feature_constraint.constraints[i - 1]) { + feature_constraint.constraints.erase( + feature_constraint.constraints.begin() + i); + feature_constraint.thresholds.erase( + feature_constraint.thresholds.begin() + i); + previous_constraint = current_constraint; + i -= 1; + } + previous_constraint = current_constraint; + i += 1; + } + // if the loop didn't get to an index greater than it_start, it needs to be + // added at the end + if (!start_done) { + if ((use_max_operator && + extremum > feature_constraint.constraints.back()) || + (!use_max_operator && + extremum < feature_constraint.constraints.back())) { + feature_constraint.constraints.push_back(extremum); + feature_constraint.thresholds.push_back(it_start); + } else { + end_done = true; + } + } + // if we didn't get to an index after it_end, then the previous constraint + // needs to be set back, unless it_end goes up to the last bin of the feature + if (!end_done && it_end != last_threshold && + previous_constraint != feature_constraint.constraints.back()) { + feature_constraint.constraints.push_back(previous_constraint); + feature_constraint.thresholds.push_back(it_end); + } + } + + // this function is called only when computing constraints when the monotone + // precise mode is set to true + // it makes sure that it is worth it to visit a branch, as it could + // not contain any relevant constraint (for example if the a branch + // with bigger values is also constraining the original leaf, then + // it is useless to visit the branch with smaller values) + std::pair + LeftRightContainsRelevantInformation(bool min_constraints_to_be_updated, + int feature, + bool split_feature_is_inner_feature) { + if (split_feature_is_inner_feature) { + return std::pair(true, true); + } + int8_t monotone_type = config_->monotone_constraints[feature]; + if (monotone_type == 0) { + return std::pair(true, true); + } + if ((monotone_type == -1 && min_constraints_to_be_updated) || + (monotone_type == 1 && !min_constraints_to_be_updated)) { + return std::pair(true, false); + } + if ((monotone_type == 1 && min_constraints_to_be_updated) || + (monotone_type == -1 && !min_constraints_to_be_updated)) { + return std::pair(false, true); + } + } + + // this function goes down in a subtree to find the + // constraints that would apply on the original leaf + void GoDownToFindConstrainingLeaves( + int feature_for_constraint, int root_monotone_feature, int node_idx, + bool min_constraints_to_be_updated, uint32_t it_start, uint32_t it_end, + const std::vector &features_of_splits_going_up_from_original_leaf, + const std::vector & + thresholds_of_splits_going_up_from_original_leaf, + const std::vector &was_original_leaf_right_child_of_split, + FeatureMinOrMaxConstraints &feature_constraint, uint32_t last_threshold) { + double extremum; + // if leaf, then constraints need to be updated according to its value + if (node_idx < 0) { + extremum = tree_->LeafOutput(~node_idx); +#ifdef DEBUG + CHECK(it_start < it_end); +#endif + UpdateConstraints(feature_constraint, extremum, it_start, it_end, + min_constraints_to_be_updated, last_threshold); + } else { // if node, keep going down the tree + // check if the children are contiguous to the original leaf and therefore + // potentially constraining + std::pair keep_going_left_right = ShouldKeepGoingLeftRight( + node_idx, features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split); + int inner_feature = tree_->split_feature_inner(node_idx); + int feature = tree_->split_feature(node_idx); + uint32_t threshold = tree_->threshold_in_bin(node_idx); + + bool split_feature_is_inner_feature = + (inner_feature == feature_for_constraint); + bool split_feature_is_monotone_feature = + (root_monotone_feature == feature_for_constraint); + // make sure that both children contain values that could + // potentially help determine the true constraints for the original leaf + std::pair left_right_contain_relevant_information = + LeftRightContainsRelevantInformation( + min_constraints_to_be_updated, feature, + split_feature_is_inner_feature && + !split_feature_is_monotone_feature); + // if both children are contiguous to the original leaf + // but one contains values greater than the other + // then no need to go down in both + if (keep_going_left_right.first && + (left_right_contain_relevant_information.first || + !keep_going_left_right.second)) { + // update thresholds based on going left + uint32_t new_it_end = split_feature_is_inner_feature + ? std::min(threshold + 1, it_end) + : it_end; + GoDownToFindConstrainingLeaves( + feature_for_constraint, root_monotone_feature, + tree_->left_child(node_idx), min_constraints_to_be_updated, + it_start, new_it_end, + features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split, feature_constraint, + last_threshold); + } + if (keep_going_left_right.second && + (left_right_contain_relevant_information.second || + !keep_going_left_right.first)) { + // update thresholds based on going right + uint32_t new_it_start = split_feature_is_inner_feature + ? std::max(threshold + 1, it_start) + : it_start; + GoDownToFindConstrainingLeaves( + feature_for_constraint, root_monotone_feature, + tree_->right_child(node_idx), min_constraints_to_be_updated, + new_it_start, it_end, + features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split, feature_constraint, + last_threshold); + } + } + } + + // this function is only used if the monotone precise mode is enabled + // it recursively goes up the tree then down to find leaf that + // are constraining the current leaf + void GoUpToFindConstrainingLeaves( + int feature_for_constraint, int node_idx, + std::vector &features_of_splits_going_up_from_original_leaf, + std::vector &thresholds_of_splits_going_up_from_original_leaf, + std::vector &was_original_leaf_right_child_of_split, + FeatureMinOrMaxConstraints &feature_constraint, + bool min_constraints_to_be_updated, uint32_t it_start, uint32_t it_end, + uint32_t last_threshold) final { + int parent_idx = + (node_idx < 0) ? tree_->leaf_parent(~node_idx) : node_parent_[node_idx]; + // if not at the root + if (parent_idx != -1) { + int inner_feature = tree_->split_feature_inner(parent_idx); + int feature = tree_->split_feature(parent_idx); + int8_t monotone_type = config_->monotone_constraints[feature]; + bool is_in_right_child = tree_->right_child(parent_idx) == node_idx; + bool is_split_numerical = tree_->IsNumericalSplit(parent_idx); + uint32_t threshold = tree_->threshold_in_bin(parent_idx); + + // by going up, more information about the position of the + // original leaf are gathered so the starting and ending + // thresholds can be updated, which will save some time later + if ((feature_for_constraint == inner_feature) && is_split_numerical) { + if (is_in_right_child) { + it_start = std::max(threshold, it_start); + } else { + it_end = std::min(threshold + 1, it_end); + } +#ifdef DEBUG + CHECK(it_start < it_end); +#endif + } + + // this is just an optimisation not to waste time going down in subtrees + // where there won't be any new constraining leaf + bool opposite_child_necessary_to_update_constraints = + OppositeChildShouldBeUpdated( + is_split_numerical, + features_of_splits_going_up_from_original_leaf, inner_feature, + was_original_leaf_right_child_of_split, is_in_right_child); + + if (opposite_child_necessary_to_update_constraints) { + // if there is no monotone constraint on a split, + // then there is no relationship between its left and right leaves' + // values + if (monotone_type != 0) { + int left_child_idx = tree_->left_child(parent_idx); + int right_child_idx = tree_->right_child(parent_idx); + bool left_child_is_curr_idx = (left_child_idx == node_idx); + + bool update_min_constraints_in_curr_child_leaf = + (monotone_type < 0) ? left_child_is_curr_idx + : !left_child_is_curr_idx; + if (update_min_constraints_in_curr_child_leaf == + min_constraints_to_be_updated) { + int opposite_child_idx = + (left_child_is_curr_idx) ? right_child_idx : left_child_idx; + + // go down in the opposite branch to find potential + // constraining leaves + GoDownToFindConstrainingLeaves( + feature_for_constraint, inner_feature, opposite_child_idx, + min_constraints_to_be_updated, it_start, it_end, + features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split, feature_constraint, + last_threshold); + } + } + // if opposite_child_should_be_updated, then it means the path to come + // up there was relevant, + // i.e. that it will be helpful going down to determine which leaf + // is actually contiguous to the original leaf and constraining + // so the variables associated with the split need to be recorded + was_original_leaf_right_child_of_split.push_back(is_in_right_child); + thresholds_of_splits_going_up_from_original_leaf.push_back(threshold); + features_of_splits_going_up_from_original_leaf.push_back(inner_feature); + } + + // since current node is not the root, keep going up + if (parent_idx != 0) { + GoUpToFindConstrainingLeaves( + feature_for_constraint, parent_idx, + features_of_splits_going_up_from_original_leaf, + thresholds_of_splits_going_up_from_original_leaf, + was_original_leaf_right_child_of_split, feature_constraint, + min_constraints_to_be_updated, it_start, it_end, last_threshold); + } + } + } +}; + LeafConstraintsBase* LeafConstraintsBase::Create(const Config* config, int num_leaves, int num_features) { if (config->monotone_constraints_method == "intermediate") { return new IntermediateLeafConstraints(config, num_leaves); } + if (config->monotone_constraints_method == "advanced") { + return new AdvancedLeafConstraints(config, num_leaves, num_features); + } return new BasicLeafConstraints(num_leaves); } From d7e8a9e7c7618e82a7eb443d26ad093f00c4db49 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 10 Jun 2020 11:01:11 +0100 Subject: [PATCH 06/42] Update tests. --- tests/python_package_test/test_engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c3d2794e228b..be10e35e8f48 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1202,7 +1202,7 @@ def is_correctly_constrained(learner, x3_to_category=True): for test_with_categorical_variable in [True, False]: trainset = self.generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable) - for monotone_constraints_method in ["basic", "intermediate"]: + for monotone_constraints_method in ["basic", "intermediate", "advanced"]: params = { 'min_data': 20, 'num_leaves': 20, @@ -1236,7 +1236,7 @@ def are_there_monotone_splits(tree, monotone_constraints): monotone_constraints = [1, -1, 0] penalization_parameter = 2.0 trainset = self.generate_trainset_for_monotone_constraints_tests(x3_to_category=False) - for monotone_constraints_method in ["basic", "intermediate"]: + for monotone_constraints_method in ["basic", "intermediate", "advanced"]: params = { 'max_depth': max_depth, 'monotone_constraints': monotone_constraints, @@ -1275,7 +1275,7 @@ def test_monotone_penalty_max(self): unconstrained_model_predictions = unconstrained_model.\ predict(x3_negatively_correlated_with_y.reshape(-1, 1)) - for monotone_constraints_method in ["basic", "intermediate"]: + for monotone_constraints_method in ["basic", "intermediate", "advanced"]: params_constrained_model["monotone_constraints_method"] = monotone_constraints_method # The penalization is so high that the first 2 features should not be used here constrained_model = lgb.train(params_constrained_model, trainset_constrained_model, 10) From 8bee2cb24c2863e9b5250eead36737c651de8307 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 29 Jul 2020 18:32:09 +0100 Subject: [PATCH 07/42] Add override. --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index f0ba8a00550a..3548752e165d 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -474,7 +474,7 @@ class BasicLeafConstraints : public LeafConstraintsBase { void RecomputeConstraintsIfNeeded( LeafConstraintsBase* constraints_, - int feature_for_constraint, int leaf_idx, uint32_t it_end) { + int feature_for_constraint, int leaf_idx, uint32_t it_end) override{ entries_[~leaf_idx]->RecomputeConstraintsIfNeeded(constraints_, feature_for_constraint, leaf_idx, it_end); } From 00293582b8baa36adec74bf727c5a7e1aeef3833 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 31 Jul 2020 08:14:29 +0100 Subject: [PATCH 08/42] linting. --- src/treelearner/monotone_constraints.hpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 3548752e165d..1041aa268c83 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -28,8 +28,8 @@ struct BasicConstraint { }; struct FeatureConstraint { - virtual void InitCumulativeConstraints(bool) {}; - virtual void Update(int) {}; + virtual void InitCumulativeConstraints(bool) {} + virtual void Update(int) {} virtual BasicConstraint LeftToBasicConstraint() const = 0; virtual BasicConstraint RightToBasicConstraint() const = 0; virtual bool ConstraintDifferentDependingOnThreshold() const = 0; @@ -44,7 +44,7 @@ struct ConstraintEntry { virtual ConstraintEntry *clone() const = 0; virtual void RecomputeConstraintsIfNeeded(LeafConstraintsBase *, int, int, - uint32_t) {}; + uint32_t) {} virtual FeatureConstraint *GetFeatureConstraint(int feature_index) = 0; }; @@ -53,7 +53,6 @@ struct ConstraintEntry { struct BasicConstraintEntry : ConstraintEntry, FeatureConstraint, BasicConstraint { - bool ConstraintDifferentDependingOnThreshold() const final { return false; } BasicConstraintEntry *clone() const final { @@ -107,7 +106,7 @@ struct FeatureMinOrMaxConstraints { size_t Size() const { return thresholds.size(); } - FeatureMinOrMaxConstraints(double extremum) { + explicit FeatureMinOrMaxConstraints(double extremum) { constraints.reserve(32); thresholds.reserve(32); @@ -160,7 +159,7 @@ struct CumulativeFeatureConstraint { } #ifdef DEBUG - CHECK(cumulative_extremum.size() != 0); + CHECK_NE(cumulative_extremum.size(), 0); #endif std::size_t n_exts = cumulative_extremum.size(); @@ -413,7 +412,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } // for each feature, an array of constraints needs to be stored - AdvancedConstraintEntry(unsigned int num_features) { + explicit AdvancedConstraintEntry(unsigned int num_features) { constraints.resize(num_features); } @@ -474,7 +473,7 @@ class BasicLeafConstraints : public LeafConstraintsBase { void RecomputeConstraintsIfNeeded( LeafConstraintsBase* constraints_, - int feature_for_constraint, int leaf_idx, uint32_t it_end) override{ + int feature_for_constraint, int leaf_idx, uint32_t it_end) override { entries_[~leaf_idx]->RecomputeConstraintsIfNeeded(constraints_, feature_for_constraint, leaf_idx, it_end); } @@ -1014,7 +1013,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { #endif UpdateConstraints(feature_constraint, extremum, it_start, it_end, min_constraints_to_be_updated, last_threshold); - } else { // if node, keep going down the tree + } else { // if node, keep going down the tree // check if the children are contiguous to the original leaf and therefore // potentially constraining std::pair keep_going_left_right = ShouldKeepGoingLeftRight( From 5acfb14f5018cf9e67a22a032afac91976b6ddc7 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Fri, 7 Aug 2020 14:15:35 +0100 Subject: [PATCH 09/42] Add override. --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 1041aa268c83..2e0ea24a74d9 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -499,7 +499,7 @@ class BasicLeafConstraints : public LeafConstraintsBase { const ConstraintEntry* Get(int leaf_idx) override { return entries_[leaf_idx]; } - FeatureConstraint* GetFeatureConstraint(int leaf_idx, int feature_index) { + FeatureConstraint* GetFeatureConstraint(int leaf_idx, int feature_index) final { return entries_[leaf_idx]->GetFeatureConstraint(feature_index); } From 770f93f21780a713f1a79d85bda91f0d03e8d753 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 11:38:17 +0100 Subject: [PATCH 10/42] Simplify condition in LeftRightContainsRelevantInformation. --- src/treelearner/monotone_constraints.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 2e0ea24a74d9..39e41caa9b5b 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -988,8 +988,10 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { (monotone_type == 1 && !min_constraints_to_be_updated)) { return std::pair(true, false); } - if ((monotone_type == 1 && min_constraints_to_be_updated) || - (monotone_type == -1 && !min_constraints_to_be_updated)) { +// Same as +// if ((monotone_type == 1 && min_constraints_to_be_updated) || +// (monotone_type == -1 && !min_constraints_to_be_updated)) + else { return std::pair(false, true); } } From e1ed79999ed0e4c242a85a6e5a24c2c351184df1 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 12:12:56 +0100 Subject: [PATCH 11/42] Add virtual destructor to FeatureConstraint. --- src/treelearner/monotone_constraints.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 39e41caa9b5b..17ef914713c6 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -33,6 +33,7 @@ struct FeatureConstraint { virtual BasicConstraint LeftToBasicConstraint() const = 0; virtual BasicConstraint RightToBasicConstraint() const = 0; virtual bool ConstraintDifferentDependingOnThreshold() const = 0; + virtual ~FeatureConstraint() {} }; struct ConstraintEntry { From af52340a8cc501bbd68f6d7a5587b8773d028569 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 12:15:27 +0100 Subject: [PATCH 12/42] Remove redundant blank line. --- src/treelearner/monotone_constraints.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 17ef914713c6..20fce6c26886 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -590,7 +590,6 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { int inner_feature, const std::vector& was_original_leaf_right_child_of_split, bool is_in_right_child) { - // if the split is categorical, it is not handled by this optimisation, // so the code will have to go down in the other child subtree to see if // there are leaves to update From 04c53e75d85e836249fc961e38f680844eb8853e Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 12:16:52 +0100 Subject: [PATCH 13/42] linting of else. --- src/treelearner/monotone_constraints.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 20fce6c26886..5657a7babd90 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -610,8 +610,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { } } return true; - } - else { + } else { return false; } } From 2e13eaf45930fc1251a3defd78d9404b9e623371 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 12:19:34 +0100 Subject: [PATCH 14/42] Indentation. --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 5657a7babd90..bf5721b7d3e8 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -852,7 +852,7 @@ class IntermediateLeafConstraints : public BasicLeafConstraints { }; class AdvancedLeafConstraints : public IntermediateLeafConstraints { -public: + public: AdvancedLeafConstraints(const Config *config, int num_leaves, int num_features) : IntermediateLeafConstraints(config, num_leaves) { From b9443b3c149a6269da7c652524f45ff430264452 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 14:06:57 +0100 Subject: [PATCH 15/42] Lint else. --- src/treelearner/monotone_constraints.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index bf5721b7d3e8..5509f559e915 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -986,11 +986,10 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { if ((monotone_type == -1 && min_constraints_to_be_updated) || (monotone_type == 1 && !min_constraints_to_be_updated)) { return std::pair(true, false); - } -// Same as -// if ((monotone_type == 1 && min_constraints_to_be_updated) || -// (monotone_type == -1 && !min_constraints_to_be_updated)) - else { + } else { + // Same as + // if ((monotone_type == 1 && min_constraints_to_be_updated) || + // (monotone_type == -1 && !min_constraints_to_be_updated)) return std::pair(false, true); } } From 12f67d736123e02bd40ebfce00b3cb9355f27d12 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 9 Aug 2020 14:29:03 +0100 Subject: [PATCH 16/42] Replaced non-const reference by pointers. --- src/treelearner/monotone_constraints.hpp | 132 +++++++++++------------ 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 5509f559e915..e1588f965ebc 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -154,23 +154,23 @@ struct CumulativeFeatureConstraint { static void CumulativeExtremum( const double &(*extremum_function)(const double &, const double &), bool is_direction_from_left_to_right, - std::vector &cumulative_extremum) { - if (cumulative_extremum.size() == 1) { + std::vector* cumulative_extremum) { + if (cumulative_extremum->size() == 1) { return; } #ifdef DEBUG - CHECK_NE(cumulative_extremum.size(), 0); + CHECK_NE(cumulative_extremum->size(), 0); #endif - std::size_t n_exts = cumulative_extremum.size(); + std::size_t n_exts = cumulative_extremum->size(); int step = is_direction_from_left_to_right ? 1 : -1; std::size_t start = is_direction_from_left_to_right ? 0 : n_exts - 1; std::size_t end = is_direction_from_left_to_right ? n_exts - 1 : 0; for (auto i = start; i != end; i = i + step) { - cumulative_extremum[i + step] = extremum_function( - cumulative_extremum[i + step], cumulative_extremum[i]); + (*cumulative_extremum)[i + step] = extremum_function( + (*cumulative_extremum)[i + step], (*cumulative_extremum)[i]); } } @@ -188,10 +188,10 @@ struct CumulativeFeatureConstraint { const double &(*min)(const double &, const double &) = std::min; const double &(*max)(const double &, const double &) = std::max; - CumulativeExtremum(max, true, cumulative_min_constraints_left_to_right); - CumulativeExtremum(max, false, cumulative_min_constraints_right_to_left); - CumulativeExtremum(min, true, cumulative_max_constraints_left_to_right); - CumulativeExtremum(min, false, cumulative_max_constraints_right_to_left); + CumulativeExtremum(max, true, &cumulative_min_constraints_left_to_right); + CumulativeExtremum(max, false, &cumulative_min_constraints_right_to_left); + CumulativeExtremum(min, true, &cumulative_max_constraints_left_to_right); + CumulativeExtremum(min, false, &cumulative_max_constraints_right_to_left); if (REVERSE) { index_min_constraints_left_to_right = @@ -338,10 +338,10 @@ class LeafConstraintsBase { virtual void GoUpToFindConstrainingLeaves( int, int, - std::vector &, - std::vector &, - std::vector &, - FeatureMinOrMaxConstraints &, bool , + std::vector*, + std::vector*, + std::vector*, + FeatureMinOrMaxConstraints*, bool , uint32_t, uint32_t, uint32_t) {} virtual void RecomputeConstraintsIfNeeded( @@ -402,9 +402,9 @@ struct AdvancedConstraintEntry : ConstraintEntry { std::vector(); constraints_->GoUpToFindConstrainingLeaves( feature_for_constraint, leaf_idx, - features_of_splits_going_up_from_original_leaf, - thresholds_of_splits_going_up_from_original_leaf, - was_original_leaf_right_child_of_split, constraints_to_be_updated, + &features_of_splits_going_up_from_original_leaf, + &thresholds_of_splits_going_up_from_original_leaf, + &was_original_leaf_right_child_of_split, &constraints_to_be_updated, constraints[feature_for_constraint] .FeatureMinConstraintsToBeUpdated(), 0, it_end, it_end); @@ -864,7 +864,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // at any point in time, for an index i, the constraint constraint[i] has to // be valid on [threshold[i]: threshold[i + 1]) (or [threshold[i]: +inf) if i // is the last index of the array) - void UpdateConstraints(FeatureMinOrMaxConstraints &feature_constraint, + void UpdateConstraints(FeatureMinOrMaxConstraints* feature_constraint, double extremum, uint32_t it_start, uint32_t it_end, bool use_max_operator, uint32_t last_threshold) { bool start_done = false; @@ -878,23 +878,23 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // the previous constraint that still applies needs to be recorded double previous_constraint; double current_constraint; - for (unsigned int i = 0; i < feature_constraint.thresholds.size();) { - current_constraint = feature_constraint.constraints[i]; + for (unsigned int i = 0; i < feature_constraint->thresholds.size();) { + current_constraint = feature_constraint->constraints[i]; // easy case when the thresholds match - if (feature_constraint.thresholds[i] == it_start) { - feature_constraint.constraints[i] = + if (feature_constraint->thresholds[i] == it_start) { + feature_constraint->constraints[i] = (use_max_operator) - ? std::max(extremum, feature_constraint.constraints[i]) - : std::min(extremum, feature_constraint.constraints[i]); + ? std::max(extremum, feature_constraint->constraints[i]) + : std::min(extremum, feature_constraint->constraints[i]); start_done = true; } - if (feature_constraint.thresholds[i] > it_start) { + if (feature_constraint->thresholds[i] > it_start) { // existing constraint is updated if there is a need for it - if (feature_constraint.thresholds[i] < it_end) { - feature_constraint.constraints[i] = + if (feature_constraint->thresholds[i] < it_end) { + feature_constraint->constraints[i] = (use_max_operator) - ? std::max(extremum, feature_constraint.constraints[i]) - : std::min(extremum, feature_constraint.constraints[i]); + ? std::max(extremum, feature_constraint->constraints[i]) + : std::min(extremum, feature_constraint->constraints[i]); } // when thresholds don't match, a new threshold // and a new constraint may need to be inserted @@ -902,29 +902,29 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { start_done = true; if ((use_max_operator && extremum > previous_constraint) || (!use_max_operator && extremum < previous_constraint)) { - feature_constraint.constraints.insert( - feature_constraint.constraints.begin() + i, extremum); - feature_constraint.thresholds.insert( - feature_constraint.thresholds.begin() + i, it_start); + feature_constraint->constraints.insert( + feature_constraint->constraints.begin() + i, extremum); + feature_constraint->thresholds.insert( + feature_constraint->thresholds.begin() + i, it_start); i += 1; } } } // easy case when the end thresholds match - if (feature_constraint.thresholds[i] == it_end) { + if (feature_constraint->thresholds[i] == it_end) { end_done = true; i += 1; break; } // if they don't then, the previous constraint needs to be added back // where the current one ends - if (feature_constraint.thresholds[i] > it_end) { + if (feature_constraint->thresholds[i] > it_end) { if (i != 0 && - previous_constraint != feature_constraint.constraints[i - 1]) { - feature_constraint.constraints.insert( - feature_constraint.constraints.begin() + i, previous_constraint); - feature_constraint.thresholds.insert( - feature_constraint.thresholds.begin() + i, it_end); + previous_constraint != feature_constraint->constraints[i - 1]) { + feature_constraint->constraints.insert( + feature_constraint->constraints.begin() + i, previous_constraint); + feature_constraint->thresholds.insert( + feature_constraint->thresholds.begin() + i, it_end); } end_done = true; i += 1; @@ -932,12 +932,12 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { } // If 2 successive constraints are the same then the second one may as // well be deleted - if (i != 0 && feature_constraint.constraints[i] == - feature_constraint.constraints[i - 1]) { - feature_constraint.constraints.erase( - feature_constraint.constraints.begin() + i); - feature_constraint.thresholds.erase( - feature_constraint.thresholds.begin() + i); + if (i != 0 && feature_constraint->constraints[i] == + feature_constraint->constraints[i - 1]) { + feature_constraint->constraints.erase( + feature_constraint->constraints.begin() + i); + feature_constraint->thresholds.erase( + feature_constraint->thresholds.begin() + i); previous_constraint = current_constraint; i -= 1; } @@ -948,11 +948,11 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // added at the end if (!start_done) { if ((use_max_operator && - extremum > feature_constraint.constraints.back()) || + extremum > feature_constraint->constraints.back()) || (!use_max_operator && - extremum < feature_constraint.constraints.back())) { - feature_constraint.constraints.push_back(extremum); - feature_constraint.thresholds.push_back(it_start); + extremum < feature_constraint->constraints.back())) { + feature_constraint->constraints.push_back(extremum); + feature_constraint->thresholds.push_back(it_start); } else { end_done = true; } @@ -960,9 +960,9 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // if we didn't get to an index after it_end, then the previous constraint // needs to be set back, unless it_end goes up to the last bin of the feature if (!end_done && it_end != last_threshold && - previous_constraint != feature_constraint.constraints.back()) { - feature_constraint.constraints.push_back(previous_constraint); - feature_constraint.thresholds.push_back(it_end); + previous_constraint != feature_constraint->constraints.back()) { + feature_constraint->constraints.push_back(previous_constraint); + feature_constraint->thresholds.push_back(it_end); } } @@ -1003,7 +1003,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { const std::vector & thresholds_of_splits_going_up_from_original_leaf, const std::vector &was_original_leaf_right_child_of_split, - FeatureMinOrMaxConstraints &feature_constraint, uint32_t last_threshold) { + FeatureMinOrMaxConstraints* feature_constraint, uint32_t last_threshold) { double extremum; // if leaf, then constraints need to be updated according to its value if (node_idx < 0) { @@ -1078,10 +1078,10 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // are constraining the current leaf void GoUpToFindConstrainingLeaves( int feature_for_constraint, int node_idx, - std::vector &features_of_splits_going_up_from_original_leaf, - std::vector &thresholds_of_splits_going_up_from_original_leaf, - std::vector &was_original_leaf_right_child_of_split, - FeatureMinOrMaxConstraints &feature_constraint, + std::vector* features_of_splits_going_up_from_original_leaf, + std::vector* thresholds_of_splits_going_up_from_original_leaf, + std::vector* was_original_leaf_right_child_of_split, + FeatureMinOrMaxConstraints* feature_constraint, bool min_constraints_to_be_updated, uint32_t it_start, uint32_t it_end, uint32_t last_threshold) final { int parent_idx = @@ -1114,8 +1114,8 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { bool opposite_child_necessary_to_update_constraints = OppositeChildShouldBeUpdated( is_split_numerical, - features_of_splits_going_up_from_original_leaf, inner_feature, - was_original_leaf_right_child_of_split, is_in_right_child); + *features_of_splits_going_up_from_original_leaf, inner_feature, + *was_original_leaf_right_child_of_split, is_in_right_child); if (opposite_child_necessary_to_update_constraints) { // if there is no monotone constraint on a split, @@ -1139,9 +1139,9 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { GoDownToFindConstrainingLeaves( feature_for_constraint, inner_feature, opposite_child_idx, min_constraints_to_be_updated, it_start, it_end, - features_of_splits_going_up_from_original_leaf, - thresholds_of_splits_going_up_from_original_leaf, - was_original_leaf_right_child_of_split, feature_constraint, + *features_of_splits_going_up_from_original_leaf, + *thresholds_of_splits_going_up_from_original_leaf, + *was_original_leaf_right_child_of_split, feature_constraint, last_threshold); } } @@ -1150,9 +1150,9 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // i.e. that it will be helpful going down to determine which leaf // is actually contiguous to the original leaf and constraining // so the variables associated with the split need to be recorded - was_original_leaf_right_child_of_split.push_back(is_in_right_child); - thresholds_of_splits_going_up_from_original_leaf.push_back(threshold); - features_of_splits_going_up_from_original_leaf.push_back(inner_feature); + was_original_leaf_right_child_of_split->push_back(is_in_right_child); + thresholds_of_splits_going_up_from_original_leaf->push_back(threshold); + features_of_splits_going_up_from_original_leaf->push_back(inner_feature); } // since current node is not the root, keep going up From 6a5d2ed484e7aa88bbfdac7878be08ac6e76c4d8 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 23 Aug 2020 14:26:49 +0100 Subject: [PATCH 17/42] Forgotten reference. --- src/treelearner/feature_histogram.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 954086459bb2..261ec08e1464 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -764,7 +764,7 @@ class FeatureHistogram { template static double CalculateSplittedLeafOutput( double sum_gradients, double sum_hessians, double l1, double l2, - double max_delta_step, const BasicConstraint constraints, + double max_delta_step, const BasicConstraint& constraints, double smoothing, data_size_t num_data, double parent_output) { double ret = CalculateSplittedLeafOutput( sum_gradients, sum_hessians, l1, l2, max_delta_step, smoothing, num_data, parent_output); From e78a5bc64d7425d15acf31f4d2766fc745bf2cdb Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 23 Aug 2020 14:42:43 +0100 Subject: [PATCH 18/42] Leverage USE_MC for efficiency. --- src/treelearner/feature_histogram.hpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 261ec08e1464..e0a80a9d4ffe 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -288,7 +288,9 @@ class FeatureHistogram { double best_sum_left_gradient = 0; double best_sum_left_hessian = 0; double gain_shift; - constraints->InitCumulativeConstraints(true); + if (USE_MC) { + constraints->InitCumulativeConstraints(true); + } if (USE_SMOOTHING) { gain_shift = GetLeafGainGivenOutput( sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, parent_output); @@ -866,12 +868,15 @@ class FeatureHistogram { data_size_t best_left_count = 0; uint32_t best_threshold = static_cast(meta_->num_bin); const double cnt_factor = num_data / sum_hessian; + BasicConstraint best_right_constraints; BasicConstraint best_left_constraints; - bool constraint_update_necessary = - constraints->ConstraintDifferentDependingOnThreshold(); - constraints->InitCumulativeConstraints(REVERSE); + USE_MC && constraints->ConstraintDifferentDependingOnThreshold(); + + if (USE_MC) { + constraints->InitCumulativeConstraints(REVERSE); + } if (REVERSE) { double sum_right_gradient = 0.0f; @@ -920,7 +925,7 @@ class FeatureHistogram { } } - if (constraint_update_necessary) { + if (USE_MC && constraint_update_necessary) { constraints->Update(t + offset); } @@ -946,8 +951,10 @@ class FeatureHistogram { // left is <= threshold, right is > threshold. so this is t-1 best_threshold = static_cast(t - 1 + offset); best_gain = current_gain; - best_right_constraints = (constraints->RightToBasicConstraint()); - best_left_constraints = (constraints->LeftToBasicConstraint()); + if (USE_MC) { + best_right_constraints = (constraints->RightToBasicConstraint()); + best_left_constraints = (constraints->LeftToBasicConstraint()); + } } } } else { @@ -1032,8 +1039,10 @@ class FeatureHistogram { best_sum_left_hessian = sum_left_hessian; best_threshold = static_cast(t + offset); best_gain = current_gain; - best_right_constraints = (constraints->RightToBasicConstraint()); - best_left_constraints = (constraints->LeftToBasicConstraint()); + if (USE_MC) { + best_right_constraints = (constraints->RightToBasicConstraint()); + best_left_constraints = (constraints->LeftToBasicConstraint()); + } } } } From 68013220ac211b1771d1f59350eb45f436740791 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 23 Aug 2020 22:55:21 +0100 Subject: [PATCH 19/42] Make constraints const again in feature_histogram.hpp. --- src/treelearner/feature_histogram.hpp | 13 ++++++------- src/treelearner/monotone_constraints.hpp | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index e0a80a9d4ffe..4ee0d10cbf0b 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -84,7 +84,7 @@ class FeatureHistogram { void FindBestThreshold(double sum_gradient, double sum_hessian, data_size_t num_data, - FeatureConstraint* constraints, + const FeatureConstraint* constraints, double parent_output, SplitInfo* output) { output->default_left = true; @@ -158,7 +158,7 @@ class FeatureHistogram { #define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING #define LAMBDA_ARGUMENTS \ double sum_gradient, double sum_hessian, data_size_t num_data, \ - FeatureConstraint* constraints, double parent_output, SplitInfo *output + const FeatureConstraint* constraints, double parent_output, SplitInfo *output #define BEFORE_ARGUMENTS sum_gradient, sum_hessian, parent_output, num_data, output, &rand_threshold #define FUNC_ARGUMENTS \ sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, \ @@ -278,7 +278,7 @@ class FeatureHistogram { void FindBestThresholdCategoricalInner(double sum_gradient, double sum_hessian, data_size_t num_data, - FeatureConstraint* constraints, + const FeatureConstraint* constraints, double parent_output, SplitInfo* output) { is_splittable_ = false; @@ -787,7 +787,7 @@ class FeatureHistogram { double sum_right_gradients, double sum_right_hessians, double l1, double l2, double max_delta_step, - FeatureConstraint* constraints, + const FeatureConstraint* constraints, int8_t monotone_constraint, double smoothing, data_size_t left_count, @@ -853,12 +853,11 @@ class FeatureHistogram { } } - // FIXME Make constraints const again and have the cumulative constraints outside of it? template void FindBestThresholdSequentially(double sum_gradient, double sum_hessian, data_size_t num_data, - FeatureConstraint* constraints, + const FeatureConstraint* constraints, double min_gain_shift, SplitInfo* output, int rand_threshold, double parent_output) { const int8_t offset = meta_->offset; @@ -1080,7 +1079,7 @@ class FeatureHistogram { hist_t* data_; bool is_splittable_ = true; - std::function find_best_threshold_fun_; }; diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index e1588f965ebc..22abf3e33a94 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -28,8 +28,8 @@ struct BasicConstraint { }; struct FeatureConstraint { - virtual void InitCumulativeConstraints(bool) {} - virtual void Update(int) {} + virtual void InitCumulativeConstraints(bool) const {} + virtual void Update(int) const {} virtual BasicConstraint LeftToBasicConstraint() const = 0; virtual BasicConstraint RightToBasicConstraint() const = 0; virtual bool ConstraintDifferentDependingOnThreshold() const = 0; @@ -258,16 +258,16 @@ struct CumulativeFeatureConstraint { struct AdvancedFeatureConstraints : FeatureConstraint { FeatureMinOrMaxConstraints min_constraints; FeatureMinOrMaxConstraints max_constraints; - CumulativeFeatureConstraint cumulative_feature_constraint; + mutable CumulativeFeatureConstraint cumulative_feature_constraint; bool min_constraints_to_be_recomputed = false; bool max_constraints_to_be_recomputed = false; - void InitCumulativeConstraints(bool REVERSE) final { + void InitCumulativeConstraints(bool REVERSE) const final { cumulative_feature_constraint = CumulativeFeatureConstraint(min_constraints, max_constraints, REVERSE); } - void Update(int threshold) final { + void Update(int threshold) const final { cumulative_feature_constraint.Update(threshold); } From 7fc04cf4911cc7b280ecbf25fce1f05191d5b27c Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Mon, 24 Aug 2020 21:14:44 +0100 Subject: [PATCH 20/42] Update docs. --- docs/Parameters.rst | 2 ++ include/LightGBM/config.h | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index dcd1353e1525..8b5cd13d88ed 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -472,6 +472,8 @@ Learning Control Parameters - ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results + - ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results + - ``monotone_penalty`` :raw-html:`🔗︎`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0`` - used only if ``monotone_constraints`` is set diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 5e1902613905..03996ed3cae0 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -448,6 +448,7 @@ struct Config { // desc = monotone constraints method // descl2 = ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions // descl2 = ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results + // descl2 = ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results std::string monotone_constraints_method = "basic"; // alias = monotone_splits_penalty, ms_penalty, mc_penalty From 7f1c05a7db1a40499afdbf5d3e48c19e32e70197 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sun, 30 Aug 2020 18:49:09 +0100 Subject: [PATCH 21/42] Add "advanced" to the monotone constraints options. --- docs/Parameters.rst | 2 +- include/LightGBM/config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 8b5cd13d88ed..ea2db3524cbd 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -462,7 +462,7 @@ Learning Control Parameters - you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature -- ``monotone_constraints_method`` :raw-html:`🔗︎`, default = ``basic``, type = enum, options: ``basic``, ``intermediate``, aliases: ``monotone_constraining_method``, ``mc_method`` +- ``monotone_constraints_method`` :raw-html:`🔗︎`, default = ``basic``, type = enum, options: ``basic``, ``intermediate``, ``advanced``, aliases: ``monotone_constraining_method``, ``mc_method`` - used only if ``monotone_constraints`` is set diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 03996ed3cae0..3bbe39ca8ace 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -443,7 +443,7 @@ struct Config { // type = enum // alias = monotone_constraining_method, mc_method - // options = basic, intermediate + // options = basic, intermediate, advanced // desc = used only if ``monotone_constraints`` is set // desc = monotone constraints method // descl2 = ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions From 24290e01081c9c3e2d542a503cdaa08684b7c793 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:11:37 +0100 Subject: [PATCH 22/42] Update monotone constraints restrictions. --- src/io/config.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/config.cpp b/src/io/config.cpp index 6878896deb58..9b3327af7eb6 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -347,13 +347,13 @@ void Config::CheckParamConflict() { } if (is_parallel && monotone_constraints_method == std::string("intermediate")) { // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints. - Log::Warning("Cannot use \"intermediate\" monotone constraints in parallel learning, auto set to \"basic\" method."); + Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in parallel learning, auto set to \"basic\" method."); monotone_constraints_method = "basic"; } if (feature_fraction_bynode != 1.0 && monotone_constraints_method == std::string("intermediate")) { // "intermediate" monotone constraints need to recompute splits. If the features are sampled when computing the // split initially, then the sampling needs to be recorded or done once again, which is currently not supported - Log::Warning("Cannot use \"intermediate\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method."); + Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method."); monotone_constraints_method = "basic"; } if (max_depth > 0 && monotone_penalty >= max_depth) { From 56bc0dac3304ad4ff8f0192970392566e890d1b1 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:16:50 +0100 Subject: [PATCH 23/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 22abf3e33a94..a2fafb805b92 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -123,7 +123,7 @@ struct FeatureMinOrMaxConstraints { } void UpdateMin(double min) { - for (unsigned int j = 0; j < constraints.size(); j++) { + for (size_t j = 0; j < constraints.size(); ++j) { if (min > constraints[j]) { constraints[j] = min; } From e47148fc2eda53f33d370a32dce0d57c4109eb71 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:17:14 +0100 Subject: [PATCH 24/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index a2fafb805b92..534ca07795d9 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -131,7 +131,7 @@ struct FeatureMinOrMaxConstraints { } void UpdateMax(double max) { - for (unsigned int j = 0; j < constraints.size(); j++) { + for (size_t j = 0; j < constraints.size(); ++j) { if (max < constraints[j]) { constraints[j] = max; } From bea1edd827e573314c6f2b5c086dc3fa6bc5090a Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:17:55 +0100 Subject: [PATCH 25/42] Remove superfluous parenthesis. --- src/treelearner/feature_histogram.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 4ee0d10cbf0b..5d7595c35c3b 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -951,8 +951,8 @@ class FeatureHistogram { best_threshold = static_cast(t - 1 + offset); best_gain = current_gain; if (USE_MC) { - best_right_constraints = (constraints->RightToBasicConstraint()); - best_left_constraints = (constraints->LeftToBasicConstraint()); + best_right_constraints = constraints->RightToBasicConstraint(); + best_left_constraints = constraints->LeftToBasicConstraint(); } } } @@ -1039,8 +1039,8 @@ class FeatureHistogram { best_threshold = static_cast(t + offset); best_gain = current_gain; if (USE_MC) { - best_right_constraints = (constraints->RightToBasicConstraint()); - best_left_constraints = (constraints->LeftToBasicConstraint()); + best_right_constraints = constraints->RightToBasicConstraint(); + best_left_constraints = constraints->LeftToBasicConstraint(); } } } From 8cf7aa85b4bc7424c4ff1e08e02b95861fb85a0d Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:19:04 +0100 Subject: [PATCH 26/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 534ca07795d9..175d0e824c37 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -461,7 +461,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { class BasicLeafConstraints : public LeafConstraintsBase { public: explicit BasicLeafConstraints(int num_leaves) : num_leaves_(num_leaves) { - for (int i = 0; i < num_leaves; i++) { + for (int i = 0; i < num_leaves; ++i) { entries_.push_back(new BasicConstraintEntry()); } } From 81226b8d62bf060211b99e3440c10017716d9116 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:19:28 +0100 Subject: [PATCH 27/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 175d0e824c37..bc790801c57f 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -878,7 +878,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // the previous constraint that still applies needs to be recorded double previous_constraint; double current_constraint; - for (unsigned int i = 0; i < feature_constraint->thresholds.size();) { + for (size_t i = 0; i < feature_constraint->thresholds.size();) { current_constraint = feature_constraint->constraints[i]; // easy case when the thresholds match if (feature_constraint->thresholds[i] == it_start) { From 6b66558972df819d09eae5c4a718f16c2983ca06 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:19:49 +0100 Subject: [PATCH 28/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index bc790801c57f..b3ce00e9b8bb 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -856,7 +856,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { AdvancedLeafConstraints(const Config *config, int num_leaves, int num_features) : IntermediateLeafConstraints(config, num_leaves) { - for (int i = 0; i < num_leaves; i++) { + for (int i = 0; i < num_leaves; ++i) { entries_[i] = new AdvancedConstraintEntry(num_features); } } From 250dfe742907200536baba9083ac18297599c93a Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:20:12 +0100 Subject: [PATCH 29/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index b3ce00e9b8bb..f8967bde0cb8 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -418,7 +418,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } void Reset() final { - for (unsigned int i = 0; i < constraints.size(); i++) { + for (size_t i = 0; i < constraints.size(); ++i) { constraints[i].Reset(); } } From 73d975210b22e844ca874c677659eab70c6cdb30 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:20:34 +0100 Subject: [PATCH 30/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index f8967bde0cb8..4487fb3aa3ff 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -424,7 +424,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } void UpdateMin(double new_min) final { - for (unsigned int i = 0; i < constraints.size(); i++) { + for (size_t i = 0; i < constraints.size(); ++i) { constraints[i].UpdateMin(new_min, false); } } From afa744f532a63ae06308ce17e7d0bb5de9d7a939 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:21:59 +0100 Subject: [PATCH 31/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 4487fb3aa3ff..1e30254d87ab 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -430,7 +430,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } void UpdateMax(double new_max) final { - for (unsigned int i = 0; i < constraints.size(); i++) { + for (size_t i = 0; i < constraints.size(); ++i) { constraints[i].UpdateMax(new_max, false); } } From 7e9987b8e5b55d3dad04e77e57960afb2126d2fc Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:22:46 +0100 Subject: [PATCH 32/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 1e30254d87ab..daf713b77372 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -436,7 +436,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } bool UpdateMinAndReturnBoolIfChanged(double new_min) final { - for (unsigned int i = 0; i < constraints.size(); i++) { + for (size_t i = 0; i < constraints.size(); ++i) { constraints[i].UpdateMin(new_min, true); } // even if nothing changed, this could have been unconstrained so it needs From 9da9d09d512c94765af34e4019e263b6fef94899 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Sat, 12 Sep 2020 19:23:08 +0100 Subject: [PATCH 33/42] Fix loop iterator. Co-authored-by: Nikita Titov --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index daf713b77372..a4502c59fffa 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -445,7 +445,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } bool UpdateMaxAndReturnBoolIfChanged(double new_max) final { - for (unsigned int i = 0; i < constraints.size(); i++) { + for (size_t i = 0; i < constraints.size(); ++i) { constraints[i].UpdateMax(new_max, true); } // even if nothing changed, this could have been unconstrained so it needs From 184c4ef8a7be147f8694d71cf15482dd09aaa4da Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:24:47 +0100 Subject: [PATCH 34/42] Remove std namespace qualifier. --- src/treelearner/monotone_constraints.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index a4502c59fffa..7e0517b16ded 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -163,10 +163,10 @@ struct CumulativeFeatureConstraint { CHECK_NE(cumulative_extremum->size(), 0); #endif - std::size_t n_exts = cumulative_extremum->size(); + size_t n_exts = cumulative_extremum->size(); int step = is_direction_from_left_to_right ? 1 : -1; - std::size_t start = is_direction_from_left_to_right ? 0 : n_exts - 1; - std::size_t end = is_direction_from_left_to_right ? n_exts - 1 : 0; + size_t start = is_direction_from_left_to_right ? 0 : n_exts - 1; + size_t end = is_direction_from_left_to_right ? n_exts - 1 : 0; for (auto i = start; i != end; i = i + step) { (*cumulative_extremum)[i + step] = extremum_function( From e9f695378fc486095bf7d9a026ad6ff1417746cc Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:29:29 +0100 Subject: [PATCH 35/42] Fix unsigned_int size_t comparison. --- src/treelearner/monotone_constraints.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 7e0517b16ded..cde206bcb411 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -146,10 +146,10 @@ struct CumulativeFeatureConstraint { std::vector cumulative_min_constraints_right_to_left; std::vector cumulative_max_constraints_left_to_right; std::vector cumulative_max_constraints_right_to_left; - unsigned int index_min_constraints_left_to_right; - unsigned int index_min_constraints_right_to_left; - unsigned int index_max_constraints_left_to_right; - unsigned int index_max_constraints_right_to_left; + size_t index_min_constraints_left_to_right; + size_t index_min_constraints_right_to_left; + size_t index_max_constraints_left_to_right; + size_t index_max_constraints_right_to_left; static void CumulativeExtremum( const double &(*extremum_function)(const double &, const double &), From 1b38dc4621b7f9e77aa55194a82b5247cff538c3 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:30:52 +0100 Subject: [PATCH 36/42] Set num_features as int for consistency with the rest of the codebase. --- src/treelearner/monotone_constraints.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index cde206bcb411..3bccc0411958 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -413,7 +413,7 @@ struct AdvancedConstraintEntry : ConstraintEntry { } // for each feature, an array of constraints needs to be stored - explicit AdvancedConstraintEntry(unsigned int num_features) { + explicit AdvancedConstraintEntry(int num_features) { constraints.resize(num_features); } From 21f32d29de3052a4504aaf00b7a9ccb1f349318f Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 19:45:00 +0100 Subject: [PATCH 37/42] Make sure constraints exist before recomputing them. --- src/treelearner/serial_tree_learner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 0853d32380dc..c2f20fb1a8db 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -714,7 +714,7 @@ void SerialTreeLearner::ComputeBestSplitForFeature( bool is_feature_numerical = train_data_->FeatureBinMapper(feature_index) ->bin_type() == BinType::NumericalBin; - if (is_feature_numerical) { + if (is_feature_numerical & !config_->monotone_constraints.empty()) { constraints_->RecomputeConstraintsIfNeeded( constraints_.get(), feature_index, ~(leaf_splits->leaf_index()), train_data_->FeatureNumBin(feature_index)); From 609f78ae1fcb84e490f2a673a5be848ce579386e Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Sat, 12 Sep 2020 20:22:09 +0100 Subject: [PATCH 38/42] Initialize previous constraints in UpdateConstraints. --- src/treelearner/monotone_constraints.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 3bccc0411958..ea2701f63a0d 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -876,7 +876,9 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { // [0, 1, 2] and [cstr1, cstr2, cstr1] // so since we loop through thresholds only once, // the previous constraint that still applies needs to be recorded - double previous_constraint; + double previous_constraint = use_max_operator + ? -std::numeric_limits::max() + : std::numeric_limits::max(); double current_constraint; for (size_t i = 0; i < feature_constraint->thresholds.size();) { current_constraint = feature_constraint->constraints[i]; From f554a249734d68e7aad9ae970a73ebdff5f60cfc Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Mon, 14 Sep 2020 11:24:39 +0100 Subject: [PATCH 39/42] Update monotone constraints restrictions. --- src/io/config.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/config.cpp b/src/io/config.cpp index 9b3327af7eb6..04f05c175799 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -345,12 +345,12 @@ void Config::CheckParamConflict() { min_data_in_leaf = 2; Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active."); } - if (is_parallel && monotone_constraints_method == std::string("intermediate")) { + if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("intermediate"))) { // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints. Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in parallel learning, auto set to \"basic\" method."); monotone_constraints_method = "basic"; } - if (feature_fraction_bynode != 1.0 && monotone_constraints_method == std::string("intermediate")) { + if (feature_fraction_bynode != 1.0 && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) { // "intermediate" monotone constraints need to recompute splits. If the features are sampled when computing the // split initially, then the sampling needs to be recorded or done once again, which is currently not supported Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method."); From 6b3d73d60cd8e06411ba1249c8f474d8d778b5b9 Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Mon, 14 Sep 2020 11:38:24 +0100 Subject: [PATCH 40/42] Refactor UpdateConstraints loop. --- src/treelearner/monotone_constraints.hpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index ea2701f63a0d..fde4bfbf033d 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -880,7 +880,7 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { ? -std::numeric_limits::max() : std::numeric_limits::max(); double current_constraint; - for (size_t i = 0; i < feature_constraint->thresholds.size();) { + for (size_t i = 0; i < feature_constraint->thresholds.size(); ++i) { current_constraint = feature_constraint->constraints[i]; // easy case when the thresholds match if (feature_constraint->thresholds[i] == it_start) { @@ -908,14 +908,13 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { feature_constraint->constraints.begin() + i, extremum); feature_constraint->thresholds.insert( feature_constraint->thresholds.begin() + i, it_start); - i += 1; + ++i; } } } // easy case when the end thresholds match if (feature_constraint->thresholds[i] == it_end) { end_done = true; - i += 1; break; } // if they don't then, the previous constraint needs to be added back @@ -929,7 +928,6 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { feature_constraint->thresholds.begin() + i, it_end); } end_done = true; - i += 1; break; } // If 2 successive constraints are the same then the second one may as @@ -941,10 +939,9 @@ class AdvancedLeafConstraints : public IntermediateLeafConstraints { feature_constraint->thresholds.erase( feature_constraint->thresholds.begin() + i); previous_constraint = current_constraint; - i -= 1; + --i; } previous_constraint = current_constraint; - i += 1; } // if the loop didn't get to an index greater than it_start, it needs to be // added at the end From 5774cf45797add6cfd9f18110808db2188dd1218 Mon Sep 17 00:00:00 2001 From: CharlesAuguste Date: Mon, 14 Sep 2020 18:26:16 +0100 Subject: [PATCH 41/42] Update src/io/config.cpp Co-authored-by: Nikita Titov --- src/io/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/config.cpp b/src/io/config.cpp index 04f05c175799..a3f7536267a3 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -345,7 +345,7 @@ void Config::CheckParamConflict() { min_data_in_leaf = 2; Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active."); } - if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("intermediate"))) { + if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) { // In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints. Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in parallel learning, auto set to \"basic\" method."); monotone_constraints_method = "basic"; From 6ec24f4860d9e581c1e08b06f0dde38d7bc7fdfa Mon Sep 17 00:00:00 2001 From: charlesAuguste Date: Mon, 21 Sep 2020 21:46:36 +0100 Subject: [PATCH 42/42] Delete white spaces. --- src/treelearner/serial_tree_learner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index c2f20fb1a8db..5426c394d91e 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -719,7 +719,7 @@ void SerialTreeLearner::ComputeBestSplitForFeature( constraints_.get(), feature_index, ~(leaf_splits->leaf_index()), train_data_->FeatureNumBin(feature_index)); } - + SplitInfo new_split; double parent_output; if (leaf_splits->leaf_index() == 0) {