Skip to content

Commit

Permalink
Fix style issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
btrotta committed Aug 14, 2019
1 parent 34e72c8 commit 5b21573
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 25 deletions.
4 changes: 2 additions & 2 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,9 @@ Learning Control Parameters

- path to a ``.json`` file that specifies bin upper bounds for some or all features

- ``.json`` file should contain an array of objects, each containing the name ``feature`` (integer feature number) and ``bin_upper_bounds`` (array of thresolds for binning)
- ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bounds`` (array of thresholds for binning)

- see `this file <https://github.com/microsoft/LightGBM/tree/master/tests/data/forced_bins.json>`__ as an example
- see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example

- ``refit_decay_rate`` :raw-html:`<a id="refit_decay_rate" title="Permalink to this parameter" href="#refit_decay_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.9``, type = double, constraints: ``0.0 <= refit_decay_rate <= 1.0``

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"feature": 1,
"bin_upper_bound": [ -0.1, -0.15, -0.2 ]
}
]
]
3 changes: 3 additions & 0 deletions examples/regression/train.conf
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ is_training_metric = true
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255

# forced bin thresholds
# forcedbins_filename = forced_bins.json

# training data
# if exsting weight file, should name to "regression.train.weight"
# alias: train_data, train
Expand Down
4 changes: 2 additions & 2 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,8 @@ struct Config {
std::string forcedsplits_filename = "";

// desc = path to a ``.json`` file that specifies bin upper bounds for some or all features
// desc = ``.json`` file should contain an array of objects, each containing the name ``feature`` (integer feature number) and ``bin_upper_bounds`` (array of thresolds for binning)
// desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/tests/data/forced_bins.json>`__ as an example
// desc = ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bounds`` (array of thresholds for binning)
// desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example
std::string forcedbins_filename = "";

// check = >=0.0
Expand Down
2 changes: 1 addition & 1 deletion src/io/bin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ namespace LightGBM {
}
} else if (missing_type_ == MissingType::None) {
bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt,
min_data_in_bin, forced_upper_bounds);
min_data_in_bin, forced_upper_bounds);
} else {
bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin - 1, total_sample_cnt - na_cnt,
min_data_in_bin, forced_upper_bounds);
Expand Down
40 changes: 22 additions & 18 deletions src/io/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
#include <LightGBM/dataset.h>

#include <LightGBM/feature_group.h>
#include <LightGBM/json11.hpp>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/threading.h>
#include <LightGBM/json11.hpp>

#include <limits>
#include <chrono>
Expand Down Expand Up @@ -1071,24 +1071,28 @@ std::vector<std::vector<double>> Dataset::GetForcedBins(std::string forced_bins_
std::vector<std::vector<double>> forced_bins(num_total_features, std::vector<double>());
if (forced_bins_path != "") {
std::ifstream forced_bins_stream(forced_bins_path.c_str());
std::stringstream buffer;
buffer << forced_bins_stream.rdbuf();
std::string err;
Json forced_bins_json = Json::parse(buffer.str(), err);
CHECK(forced_bins_json.is_array());
std::vector<Json> forced_bins_arr = forced_bins_json.array_items();
for (int i = 0; i < forced_bins_arr.size(); ++i) {
int feature_num = forced_bins_arr[i]["feature"].int_value();
CHECK(feature_num < num_total_features);
std::vector<Json> bounds_arr = forced_bins_arr[i]["bin_upper_bound"].array_items();
for (int j = 0; j < bounds_arr.size(); ++j) {
forced_bins[feature_num].push_back(bounds_arr[j].number_value());
if (forced_bins_stream.fail()) {
Log::Warning("Could not open %s. Will ignore.", forced_bins_path.c_str());
} else {
std::stringstream buffer;
buffer << forced_bins_stream.rdbuf();
std::string err;
Json forced_bins_json = Json::parse(buffer.str(), err);
CHECK(forced_bins_json.is_array());
std::vector<Json> forced_bins_arr = forced_bins_json.array_items();
for (int i = 0; i < forced_bins_arr.size(); ++i) {
int feature_num = forced_bins_arr[i]["feature"].int_value();
CHECK(feature_num < num_total_features);
std::vector<Json> bounds_arr = forced_bins_arr[i]["bin_upper_bound"].array_items();
for (int j = 0; j < bounds_arr.size(); ++j) {
forced_bins[feature_num].push_back(bounds_arr[j].number_value());
}
}
// remove duplicates
for (int i = 0; i < num_total_features; ++i) {
auto new_end = std::unique(forced_bins[i].begin(), forced_bins[i].end());
forced_bins[i].erase(new_end, forced_bins[i].end());
}
}
// remove duplicates
for (int i = 0; i < num_total_features; ++i) {
auto new_end = std::unique(forced_bins[i].begin(), forced_bins[i].end());
forced_bins[i].erase(new_end, forced_bins[i].end());
}
}
return forced_bins;
Expand Down
1 change: 1 addition & 0 deletions src/io/dataset_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/

#include <LightGBM/dataset_loader.h>
#include <LightGBM/network.h>
#include <LightGBM/utils/array_args.h>
Expand Down
3 changes: 2 additions & 1 deletion tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,8 @@ def test_forced_bins(self):
x[:, 0] = np.arange(0, 1, 0.01)
x[:, 1] = -np.arange(0, 1, 0.01)
y = np.arange(0, 1, 0.01)
forcedbins_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/forced_bins.json')
forcedbins_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/regression/forced_bins.json')
params = {'objective': 'regression_l1',
'max_bin': 6,
'forcedbins_filename': forcedbins_filename,
Expand Down

0 comments on commit 5b21573

Please sign in to comment.