From 5b21573ecb4dd9e463e47783b9a8309f000c6bf2 Mon Sep 17 00:00:00 2001
From: btrotta <btrotta@users.noreply.github.com>
Date: Wed, 14 Aug 2019 20:10:21 +1000
Subject: [PATCH] Fix style issues.

---
 docs/Parameters.rst                           |  4 +-
 .../regression}/forced_bins.json              |  2 +-
 examples/regression/train.conf                |  3 ++
 include/LightGBM/config.h                     |  4 +-
 src/io/bin.cpp                                |  2 +-
 src/io/dataset.cpp                            | 40 ++++++++++---------
 src/io/dataset_loader.cpp                     |  1 +
 tests/python_package_test/test_engine.py      |  3 +-
 8 files changed, 34 insertions(+), 25 deletions(-)
 rename {tests/data => examples/regression}/forced_bins.json (98%)
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 584237464fd1..83a04b992393 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -408,9 +408,9 @@ Learning Control Parameters
 
    -  path to a ``.json`` file that specifies bin upper bounds for some or all features
 
-   -  ``.json`` file should contain an array of objects, each containing the name ``feature`` (integer feature number) and ``bin_upper_bounds`` (array of thresolds for binning)
+   -  ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bounds`` (array of thresholds for binning)
 
-   -  see `this file <https://github.com/microsoft/LightGBM/tree/master/tests/data/forced_bins.json>`__ as an example
+   -  see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example
 
 -  ``refit_decay_rate`` :raw-html:`<a id="refit_decay_rate" title="Permalink to this parameter" href="#refit_decay_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.9``, type = double, constraints: ``0.0 <= refit_decay_rate <= 1.0``
 
diff --git a/tests/data/forced_bins.json b/examples/regression/forced_bins.json
similarity index 98%
rename from tests/data/forced_bins.json
rename to examples/regression/forced_bins.json
index aa74c36ffb78..1ee0a49d727c 100644
--- a/tests/data/forced_bins.json
+++ b/examples/regression/forced_bins.json
@@ -7,4 +7,4 @@
         "feature": 1,
         "bin_upper_bound": [ -0.1, -0.15, -0.2 ]
     }
-]
\ No newline at end of file
+]
diff --git a/examples/regression/train.conf b/examples/regression/train.conf
index 11396c23ecc2..4c73169dc8f9 100644
--- a/examples/regression/train.conf
+++ b/examples/regression/train.conf
@@ -29,6 +29,9 @@ is_training_metric = true
 # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
 max_bin = 255
 
+# forced bin thresholds
+# forcedbins_filename = forced_bins.json
+
 # training data
 # if exsting weight file, should name to "regression.train.weight"
 # alias: train_data, train
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 1c0c14f69508..89fa57453c88 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -403,8 +403,8 @@ struct Config {
   std::string forcedsplits_filename = "";
 
   // desc = path to a ``.json`` file that specifies bin upper bounds for some or all features
-  // desc = ``.json`` file should contain an array of objects, each containing the name ``feature`` (integer feature number) and ``bin_upper_bounds`` (array of thresolds for binning)
-  // desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/tests/data/forced_bins.json>`__ as an example
+  // desc = ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bounds`` (array of thresholds for binning)
+  // desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example
   std::string forcedbins_filename = "";
 
   // check = >=0.0
diff --git a/src/io/bin.cpp b/src/io/bin.cpp
index 62713d1bddd3..2556a59b4715 100644
--- a/src/io/bin.cpp
+++ b/src/io/bin.cpp
@@ -320,7 +320,7 @@ namespace LightGBM {
         }
       } else if (missing_type_ == MissingType::None) {
         bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, 
-                                                  min_data_in_bin, forced_upper_bounds);
+                                                   min_data_in_bin, forced_upper_bounds);
       } else {
         bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin - 1, total_sample_cnt - na_cnt, 
                                                    min_data_in_bin, forced_upper_bounds);
diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index c931e945cd24..269c06c4c37d 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -5,10 +5,10 @@
 #include <LightGBM/dataset.h>
 
 #include <LightGBM/feature_group.h>
+#include <LightGBM/json11.hpp>
 #include <LightGBM/utils/array_args.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 #include <LightGBM/utils/threading.h>
-#include <LightGBM/json11.hpp>
 
 #include <limits>
 #include <chrono>
@@ -1071,24 +1071,28 @@ std::vector<std::vector<double>> Dataset::GetForcedBins(std::string forced_bins_
   std::vector<std::vector<double>> forced_bins(num_total_features, std::vector<double>());
   if (forced_bins_path != "") {
     std::ifstream forced_bins_stream(forced_bins_path.c_str());
-    std::stringstream buffer;
-    buffer << forced_bins_stream.rdbuf();
-    std::string err;
-    Json forced_bins_json = Json::parse(buffer.str(), err);
-    CHECK(forced_bins_json.is_array());
-    std::vector<Json> forced_bins_arr = forced_bins_json.array_items();
-    for (int i = 0; i < forced_bins_arr.size(); ++i) {
-      int feature_num = forced_bins_arr[i]["feature"].int_value();
-      CHECK(feature_num < num_total_features);
-      std::vector<Json> bounds_arr = forced_bins_arr[i]["bin_upper_bound"].array_items();
-      for (int j = 0; j < bounds_arr.size(); ++j) {
-        forced_bins[feature_num].push_back(bounds_arr[j].number_value());
+    if (forced_bins_stream.fail()) {
+      Log::Warning("Could not open %s. Will ignore.", forced_bins_path.c_str());
+    } else {
+      std::stringstream buffer;
+      buffer << forced_bins_stream.rdbuf();
+      std::string err;
+      Json forced_bins_json = Json::parse(buffer.str(), err);
+      CHECK(forced_bins_json.is_array());
+      std::vector<Json> forced_bins_arr = forced_bins_json.array_items();
+      for (int i = 0; i < forced_bins_arr.size(); ++i) {
+        int feature_num = forced_bins_arr[i]["feature"].int_value();
+        CHECK(feature_num < num_total_features);
+        std::vector<Json> bounds_arr = forced_bins_arr[i]["bin_upper_bound"].array_items();
+        for (int j = 0; j < bounds_arr.size(); ++j) {
+          forced_bins[feature_num].push_back(bounds_arr[j].number_value());
+        }
+      }
+      // remove duplicates
+      for (int i = 0; i < num_total_features; ++i) {
+        auto new_end = std::unique(forced_bins[i].begin(), forced_bins[i].end());
+        forced_bins[i].erase(new_end, forced_bins[i].end());
       }
-    }
-    // remove duplicates
-    for (int i = 0; i < num_total_features; ++i) {
-      auto new_end = std::unique(forced_bins[i].begin(), forced_bins[i].end());
-      forced_bins[i].erase(new_end, forced_bins[i].end());
     }
   }
   return forced_bins;
diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index f36d5b1df27d..eb83d74bfe3d 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -2,6 +2,7 @@
  * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
+
 #include <LightGBM/dataset_loader.h>
 #include <LightGBM/network.h>
 #include <LightGBM/utils/array_args.h>
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 59ea0113f50a..d55bac7711a1 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1550,7 +1550,8 @@ def test_forced_bins(self):
         x[:, 0] = np.arange(0, 1, 0.01)
         x[:, 1] = -np.arange(0, 1, 0.01)
         y = np.arange(0, 1, 0.01)
-        forcedbins_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/forced_bins.json')
+        forcedbins_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                           '../../examples/regression/forced_bins.json')
         params = {'objective': 'regression_l1',
                   'max_bin': 6,
                   'forcedbins_filename': forcedbins_filename,