From 9ed04a336b9839366c93b66f050ad52917ed0b68 Mon Sep 17 00:00:00 2001 From: btrotta Date: Tue, 20 Aug 2019 21:26:06 +1000 Subject: [PATCH] Change binning behavior to be same as PR #2342. --- src/io/bin.cpp | 14 +++++++++----- tests/python_package_test/test_engine.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/io/bin.cpp b/src/io/bin.cpp index b26a6a461e3e..40da30c6ad2d 100644 --- a/src/io/bin.cpp +++ b/src/io/bin.cpp @@ -186,7 +186,7 @@ namespace LightGBM { } } - // include zero bounds if possible + // include zero bounds and infinity bound if (max_bin == 2) { if (left_cnt == 0) { bin_upper_bound.push_back(kZeroThreshold); @@ -194,9 +194,14 @@ namespace LightGBM { bin_upper_bound.push_back(-kZeroThreshold); } } else if (max_bin >= 3) { - bin_upper_bound.push_back(-kZeroThreshold); - bin_upper_bound.push_back(kZeroThreshold); + if (left_cnt > 0) { + bin_upper_bound.push_back(-kZeroThreshold); + } + if (right_start >= 0) { + bin_upper_bound.push_back(kZeroThreshold); + } } + bin_upper_bound.push_back(std::numeric_limits::infinity()); // add forced bounds, excluding zeros since we have already added zero bounds int i = 0; @@ -207,7 +212,6 @@ namespace LightGBM { ++i; } } - bin_upper_bound.push_back(std::numeric_limits::infinity()); int max_to_insert = max_bin - static_cast(bin_upper_bound.size()); int num_to_insert = std::min(max_to_insert, static_cast(forced_upper_bounds.size())); if (num_to_insert > 0) { @@ -239,7 +243,7 @@ namespace LightGBM { } bin_upper_bound.insert(bin_upper_bound.end(), bounds_to_add.begin(), bounds_to_add.end()); std::stable_sort(bin_upper_bound.begin(), bin_upper_bound.end()); - CHECK(bin_upper_bound.size() <= max_bin); + CHECK(bin_upper_bound.size() <= static_cast(max_bin)); return bin_upper_bound; } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 4c60a23ba4ea..63f1468132a5 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -915,7 +915,7 @@ def test_max_bin_by_feature(self): } lgb_data = lgb.Dataset(X, label=y) est = lgb.train(params, lgb_data, num_boost_round=1) - self.assertEqual(len(np.unique(est.predict(X))), 99) + self.assertEqual(len(np.unique(est.predict(X))), 100) params['max_bin_by_feature'] = [2, 100] lgb_data = lgb.Dataset(X, label=y) est = lgb.train(params, lgb_data, num_boost_round=1)