diff --git a/.gitmodules b/.gitmodules index 133ceb3889da..8f1772cd19fa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "include/boost/compute"] path = compute url = https://github.com/boostorg/compute +[submodule "external_libs/fmt"] + path = external_libs/fmt + url = https://github.com/fmtlib/fmt.git +[submodule "external_libs/fast_double_parser"] + path = external_libs/fast_double_parser + url = https://github.com/lemire/fast_double_parser.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 04b8861b12ca..95508970e8de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -444,6 +444,11 @@ if(__BUILD_FOR_R) endif(MSVC) endif(__BUILD_FOR_R) +# fmtlib/fmt +add_subdirectory(external_libs/fmt) +TARGET_LINK_LIBRARIES(lightgbm PUBLIC fmt::fmt) +TARGET_LINK_LIBRARIES(_lightgbm PUBLIC fmt::fmt) + install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib diff --git a/external_libs/fast_double_parser b/external_libs/fast_double_parser new file mode 160000 index 000000000000..98c751de3e26 --- /dev/null +++ b/external_libs/fast_double_parser @@ -0,0 +1 @@ +Subproject commit 98c751de3e2681f9baf4e95b3956e0723cbdf5ed diff --git a/external_libs/fmt b/external_libs/fmt new file mode 160000 index 000000000000..2e620ddbcd6c --- /dev/null +++ b/external_libs/fmt @@ -0,0 +1 @@ +Subproject commit 2e620ddbcd6c18c13fbc48b3cf837817c87281f3 diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 07b8484b5577..1db8a577d2de 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -5,6 +5,8 @@ #ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ #define LIGHTGBM_UTILS_COMMON_FUN_H_ +#include "../../../external_libs/fmt/include/fmt/format.h" +#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" #include #include @@ -15,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +54,13 @@ namespace LightGBM { namespace Common { +/*! +* Imbues the stream with the C locale. +*/ +static void C_stringstream(std::stringstream &ss) { + ss.imbue(std::locale::classic()); +} + inline static char tolower(char in) { if (in <= 'Z' && in >= 'A') return in - ('Z' - 'z'); @@ -329,94 +339,6 @@ inline static bool AtofAndCheck(const char* p, double* out) { return true; } -inline static unsigned CountDecimalDigit32(uint32_t n) { -#if defined(_MSC_VER) || defined(__GNUC__) - static const uint32_t powers_of_10[] = { - 0, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000 - }; -#ifdef _MSC_VER - // NOLINTNEXTLINE - unsigned long i = 0; - _BitScanReverse(&i, n | 1); - uint32_t t = (i + 1) * 1233 >> 12; -#elif __GNUC__ - uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; -#endif - return t - (n < powers_of_10[t]) + 1; -#else - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -#endif -} - -inline static void Uint32ToStr(uint32_t value, char* buffer) { - const char kDigitsLut[200] = { - '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', - '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', - '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', - '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', - '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', - '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', - '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', - '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', - '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', - '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' - }; - unsigned digit = CountDecimalDigit32(value); - buffer += digit; - *buffer = '\0'; - - while (value >= 100) { - const unsigned i = (value % 100) << 1; - value /= 100; - *--buffer = kDigitsLut[i + 1]; - *--buffer = kDigitsLut[i]; - } - - if (value < 10) { - *--buffer = static_cast(value) + '0'; - } else { - const unsigned i = value << 1; - *--buffer = kDigitsLut[i + 1]; - *--buffer = kDigitsLut[i]; - } -} - -inline static void Int32ToStr(int32_t value, char* buffer) { - uint32_t u = static_cast(value); - if (value < 0) { - *buffer++ = '-'; - u = ~u + 1; - } - Uint32ToStr(u, buffer); -} - -inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) { - #ifdef _MSC_VER - int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value); - #else - int num_chars = snprintf(buffer, buffer_len, "%.17g", value); - #endif - CHECK_GE(num_chars, 0); -} - inline static const char* SkipSpaceAndTab(const char* p) { while (*p == ' ' || *p == '\t') { ++p; @@ -440,67 +362,6 @@ inline static std::vector ArrayCast(const std::vector& arr) { return ret; } -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t) const { - Int32ToStr(value, buffer); - } -}; - -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t buf_len) - const { - #ifdef _MSC_VER - int num_chars = sprintf_s(buffer, buf_len, "%g", value); - #else - int num_chars = snprintf(buffer, buf_len, "%g", value); - #endif - CHECK_GE(num_chars, 0); - } -}; - -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t) const { - Uint32ToStr(value, buffer); - } -}; - -template -inline static std::string ArrayToStringFast(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - __TToStringHelperFast::value, std::is_unsigned::value> helper; - const size_t buf_len = 16; - std::vector buffer(buf_len); - std::stringstream str_buf; - helper(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - helper(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); - } - return str_buf.str(); -} - -inline static std::string ArrayToString(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - const size_t buf_len = 32; - std::vector buffer(buf_len); - std::stringstream str_buf; - DoubleToStr(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - DoubleToStr(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); - } - return str_buf.str(); -} - template struct __StringToTHelper { T operator()(const std::string& str) const { @@ -588,11 +449,14 @@ inline static std::vector StringToArrayFast(const std::string& str, int n) { } template -inline static std::string Join(const std::vector& strs, const char* delimiter) { +inline static std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale=false) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[0]; for (size_t i = 1; i < strs.size(); ++i) { @@ -603,11 +467,14 @@ inline static std::string Join(const std::vector& strs, const char* delimiter } template<> -inline std::string Join(const std::vector& strs, const char* delimiter) { +inline std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << static_cast(strs[0]); for (size_t i = 1; i < strs.size(); ++i) { @@ -618,13 +485,16 @@ inline std::string Join(const std::vector& strs, const char* del } template -inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { +inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter, const bool force_C_locale=false) { if (end - start <= 0) { return std::string(""); } start = std::min(start, static_cast(strs.size()) - 1); end = std::min(end, static_cast(strs.size())); std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[start]; for (size_t i = start + 1; i < end; ++i) { @@ -1137,6 +1007,213 @@ class FunctionTimer { extern Common::Timer global_timer; + + /*! + * Provides locale-independent alternatives to Common's methods. + * Essential to make models robust to locale settings. + */ + namespace CommonC { + + template + inline static std::string Join(const std::vector& strs, const char* delimiter) { + return LightGBM::Common::Join(strs, delimiter, true); + } + + template + inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { + return LightGBM::Common::Join(strs, start, end, delimiter, true); + } + + inline static const char* Atof(const char* p, double* out) { + return LightGBM::Common::Atof(p, out); + } + + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + return LightGBM::Common::Atoi(p, out); + } + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + */ + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + double tmp = 0.0f; + auto ret = Atof(p, &tmp); + *out = static_cast(tmp); + return ret; + } + }; + + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + T ret = 0; + LightGBM::Common::Atoi(str.c_str(), &ret); + return ret; + } + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. + */ + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + double tmp; + + // Fast (common) path: For numeric inputs in RFC 7159 format: + const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); + + // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: + if (!fast_parse_succeeded) { + std::stringstream ss; + Common::C_stringstream(ss); + ss << str; + ss >> tmp; + } + + return static_cast(tmp); + } + }; + + /*! + * Safely formats a value onto a buffer according to a format string and null-terminates it. + * + * \note It checks that the full value was written or forcefully aborts. + * This safety check serves to prevent incorrect internal API usage. + * Correct usage will never incur in this problem: + * - The received buffer size shall be sufficient at all times for the input format string and value. + */ + template + inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { + auto result = fmt::format_to_n(buffer, buf_len, format, value); + if (result.size >= buf_len) { + Log::Fatal("Numerical conversion failed. Buffer is too small."); + } + buffer[result.size] = '\0'; + } + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{}", value); + } + }; + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:g}", value); + } + }; + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:.17g}", value); + } + }; + + /*! + * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, + * this method has less precision for floating point numbers than ``StringToArray``, + * which calls ``__StringToTHelper``. + * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! + * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + */ + template + inline static std::vector StringToArrayFast(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + auto p_str = str.c_str(); + __StringToTHelperFast::value> helper; + std::vector ret(n); + for (int i = 0; i < n; ++i) { + p_str = helper(p_str, &ret[i]); + } + return ret; + } + + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); + CHECK_EQ(strs.size(), static_cast(n)); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; + } + + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, char delimiter) { + std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; + } + + /*! + * Converts an array to a string with with values separated by the space character. + * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality + * and is locale-independent. + * + * \note If ``high_precision_output`` is set to true, + * floating point values are output with more digits of precision. + */ + template + inline static std::string ArrayToString(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + __TToStringHelper::value, high_precision_output> helper; + const size_t buf_len = high_precision_output ? 32 : 16; + std::vector buffer(buf_len); + std::stringstream str_buf; + Common::C_stringstream(str_buf); + helper(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + helper(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); + } + + } // Namespace CommonC + + } // namespace LightGBM #endif // LightGBM_UTILS_COMMON_FUN_H_ diff --git a/python-package/setup.py b/python-package/setup.py index 4e3242fe2191..61a69c42c299 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -43,6 +43,7 @@ def copy_files_helper(folder_name): if not os.path.isfile(os.path.join(CURRENT_DIR, '_IS_SOURCE_PACKAGE.txt')): copy_files_helper('include') copy_files_helper('src') + copy_files_helper('external_libs') if not os.path.exists(os.path.join(CURRENT_DIR, "compile", "windows")): os.makedirs(os.path.join(CURRENT_DIR, "compile", "windows")) copy_file(os.path.join(CURRENT_DIR, os.path.pardir, "windows", "LightGBM.sln"), diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index 4eeb731f587f..e5cec8b61db0 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -20,6 +20,7 @@ const char* kModelVersion = "v3"; std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_importance_type) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "{"; str_buf << "\"name\":\"" << SubModelName() << "\"," << '\n'; @@ -34,16 +35,17 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ str_buf << "\"average_output\":" << (average_output_ ? "true" : "false") << ",\n"; - str_buf << "\"feature_names\":[\"" << Common::Join(feature_names_, "\",\"") + str_buf << "\"feature_names\":[\"" << CommonC::Join(feature_names_, "\",\"") << "\"]," << '\n'; str_buf << "\"monotone_constraints\":[" - << Common::Join(monotone_constraints_, ",") << "]," << '\n'; + << CommonC::Join(monotone_constraints_, ",") << "]," << '\n'; str_buf << "\"feature_infos\":" << "{"; bool first_obj = true; for (size_t i = 0; i < feature_infos_.size(); ++i) { std::stringstream json_str_buf; + Common::C_stringstream(json_str_buf); auto strs = Common::Split(feature_infos_[i].c_str(), ":"); if (strs[0][0] == '[') { strs[0].erase(0, 1); // remove '[' @@ -56,12 +58,12 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ json_str_buf << "\"max_value\":" << Common::AvoidInf(max_) << ","; json_str_buf << "\"values\":[]}"; } else if (strs[0] != "none") { // categorical feature - auto vals = Common::StringToArray(feature_infos_[i], ':'); + auto vals = CommonC::StringToArray(feature_infos_[i], ':'); auto max_idx = ArrayArgs::ArgMax(vals); auto min_idx = ArrayArgs::ArgMin(vals); json_str_buf << "{\"min_value\":" << vals[min_idx] << ","; json_str_buf << "\"max_value\":" << vals[max_idx] << ","; - json_str_buf << "\"values\":[" << Common::Join(vals, ",") << "]}"; + json_str_buf << "\"values\":[" << CommonC::Join(vals, ",") << "]}"; } else { // unused feature continue; } @@ -121,6 +123,7 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ std::string GBDT::ModelToIfElse(int num_iteration) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "#include \"gbdt.h\"" << '\n'; str_buf << "#include " << '\n'; @@ -155,6 +158,7 @@ std::string GBDT::ModelToIfElse(int num_iteration) const { str_buf << " };" << '\n' << '\n'; std::stringstream pred_str_buf; + Common::C_stringstream(pred_str_buf); pred_str_buf << "\t" << "int early_stop_round_counter = 0;" << '\n'; pred_str_buf << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n'; @@ -186,6 +190,7 @@ std::string GBDT::ModelToIfElse(int num_iteration) const { str_buf << " };" << '\n' << '\n'; std::stringstream pred_str_buf_map; + Common::C_stringstream(pred_str_buf_map); pred_str_buf_map << "\t" << "int early_stop_round_counter = 0;" << '\n'; pred_str_buf_map << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n'; @@ -305,6 +310,7 @@ bool GBDT::SaveModelToIfElse(int num_iteration, const char* filename) const { std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int feature_importance_type) const { std::stringstream ss; + Common::C_stringstream(ss); // output model type ss << SubModelName() << '\n'; @@ -325,14 +331,14 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int ss << "average_output" << '\n'; } - ss << "feature_names=" << Common::Join(feature_names_, " ") << '\n'; + ss << "feature_names=" << CommonC::Join(feature_names_, " ") << '\n'; if (monotone_constraints_.size() != 0) { - ss << "monotone_constraints=" << Common::Join(monotone_constraints_, " ") + ss << "monotone_constraints=" << CommonC::Join(monotone_constraints_, " ") << '\n'; } - ss << "feature_infos=" << Common::Join(feature_infos_, " ") << '\n'; + ss << "feature_infos=" << CommonC::Join(feature_infos_, " ") << '\n'; int num_used_model = static_cast(models_.size()); int total_iteration = num_used_model / num_tree_per_iteration_; @@ -356,7 +362,7 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int tree_sizes[idx] = tree_strs[idx].size(); } - ss << "tree_sizes=" << Common::Join(tree_sizes, " ") << '\n'; + ss << "tree_sizes=" << CommonC::Join(tree_sizes, " ") << '\n'; ss << '\n'; for (int i = 0; i < num_used_model - start_model; ++i) { @@ -491,7 +497,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { // get monotone_constraints if (key_vals.count("monotone_constraints")) { - monotone_constraints_ = Common::StringToArray(key_vals["monotone_constraints"].c_str(), ' '); + monotone_constraints_ = CommonC::StringToArray(key_vals["monotone_constraints"].c_str(), ' '); if (monotone_constraints_.size() != static_cast(max_feature_idx_ + 1)) { Log::Fatal("Wrong size of monotone_constraints"); return false; @@ -533,7 +539,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { p = Common::SkipNewLine(p); } } else { - std::vector tree_sizes = Common::StringToArray(key_vals["tree_sizes"].c_str(), ' '); + std::vector tree_sizes = CommonC::StringToArray(key_vals["tree_sizes"].c_str(), ' '); std::vector tree_boundries(tree_sizes.size() + 1, 0); int num_trees = static_cast(tree_sizes.size()); for (int i = 0; i < num_trees; ++i) { @@ -564,6 +570,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { iter_ = 0; bool is_inparameter = false; std::stringstream ss; + Common::C_stringstream(ss); while (p < end) { auto line_len = Common::GetLine(p); if (line_len > 0) { diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 8e5104f168eb..991e13ef5dec 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -222,37 +222,39 @@ double Tree::GetLowerBoundValue() const { std::string Tree::ToString() const { std::stringstream str_buf; + Common::C_stringstream(str_buf); + str_buf << "num_leaves=" << num_leaves_ << '\n'; str_buf << "num_cat=" << num_cat_ << '\n'; str_buf << "split_feature=" - << Common::ArrayToStringFast(split_feature_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(split_feature_, num_leaves_ - 1) << '\n'; str_buf << "split_gain=" - << Common::ArrayToStringFast(split_gain_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(split_gain_, num_leaves_ - 1) << '\n'; str_buf << "threshold=" - << Common::ArrayToString(threshold_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(threshold_, num_leaves_ - 1) << '\n'; str_buf << "decision_type=" - << Common::ArrayToStringFast(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; str_buf << "left_child=" - << Common::ArrayToStringFast(left_child_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(left_child_, num_leaves_ - 1) << '\n'; str_buf << "right_child=" - << Common::ArrayToStringFast(right_child_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(right_child_, num_leaves_ - 1) << '\n'; str_buf << "leaf_value=" - << Common::ArrayToString(leaf_value_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_value_, num_leaves_) << '\n'; str_buf << "leaf_weight=" - << Common::ArrayToString(leaf_weight_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_weight_, num_leaves_) << '\n'; str_buf << "leaf_count=" - << Common::ArrayToStringFast(leaf_count_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_count_, num_leaves_) << '\n'; str_buf << "internal_value=" - << Common::ArrayToStringFast(internal_value_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_value_, num_leaves_ - 1) << '\n'; str_buf << "internal_weight=" - << Common::ArrayToStringFast(internal_weight_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_weight_, num_leaves_ - 1) << '\n'; str_buf << "internal_count=" - << Common::ArrayToStringFast(internal_count_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_count_, num_leaves_ - 1) << '\n'; if (num_cat_ > 0) { str_buf << "cat_boundaries=" - << Common::ArrayToStringFast(cat_boundaries_, num_cat_ + 1) << '\n'; + << CommonC::ArrayToString(cat_boundaries_, num_cat_ + 1) << '\n'; str_buf << "cat_threshold=" - << Common::ArrayToStringFast(cat_threshold_, cat_threshold_.size()) << '\n'; + << CommonC::ArrayToString(cat_threshold_, cat_threshold_.size()) << '\n'; } str_buf << "shrinkage=" << shrinkage_ << '\n'; str_buf << '\n'; @@ -261,6 +263,7 @@ std::string Tree::ToString() const { std::string Tree::ToJSON() const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << "\"num_leaves\":" << num_leaves_ << "," << '\n'; str_buf << "\"num_cat\":" << num_cat_ << "," << '\n'; @@ -276,6 +279,7 @@ std::string Tree::ToJSON() const { std::string Tree::NodeToJSON(int index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -295,7 +299,7 @@ std::string Tree::NodeToJSON(int index) const { } } } - str_buf << "\"threshold\":\"" << Common::Join(cats, "||") << "\"," << '\n'; + str_buf << "\"threshold\":\"" << CommonC::Join(cats, "||") << "\"," << '\n'; str_buf << "\"decision_type\":\"==\"," << '\n'; } else { str_buf << "\"threshold\":" << Common::AvoidInf(threshold_[index]) << "," << '\n'; @@ -336,6 +340,7 @@ std::string Tree::NodeToJSON(int index) const { std::string Tree::NumericalDecisionIfElse(int node) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); uint8_t missing_type = GetMissingType(decision_type_[node]); bool default_left = GetDecisionType(decision_type_[node], kDefaultLeftMask); if (missing_type == MissingType::None @@ -360,6 +365,7 @@ std::string Tree::NumericalDecisionIfElse(int node) const { std::string Tree::CategoricalDecisionIfElse(int node) const { uint8_t missing_type = GetMissingType(decision_type_[node]); std::stringstream str_buf; + Common::C_stringstream(str_buf); if (missing_type == MissingType::NaN) { str_buf << "if (std::isnan(fval)) { int_fval = -1; } else { int_fval = static_cast(fval); }"; } else { @@ -375,6 +381,7 @@ std::string Tree::CategoricalDecisionIfElse(int node) const { std::string Tree::ToIfElse(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "double PredictTree" << index; if (predict_leaf_index) { str_buf << "Leaf"; @@ -433,6 +440,7 @@ std::string Tree::ToIfElse(int index, bool predict_leaf_index) const { std::string Tree::NodeToIfElse(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -464,6 +472,7 @@ std::string Tree::NodeToIfElse(int index, bool predict_leaf_index) const { std::string Tree::NodeToIfElseByMap(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -526,13 +535,13 @@ Tree::Tree(const char* str, size_t* used_len) { Common::Atoi(key_vals["num_cat"].c_str(), &num_cat_); if (key_vals.count("leaf_value")) { - leaf_value_ = Common::StringToArray(key_vals["leaf_value"], num_leaves_); + leaf_value_ = CommonC::StringToArray(key_vals["leaf_value"], num_leaves_); } else { Log::Fatal("Tree model string format error, should contain leaf_value field"); } if (key_vals.count("shrinkage")) { - Common::Atof(key_vals["shrinkage"].c_str(), &shrinkage_); + CommonC::Atof(key_vals["shrinkage"].c_str(), &shrinkage_); } else { shrinkage_ = 1.0f; } @@ -540,80 +549,80 @@ Tree::Tree(const char* str, size_t* used_len) { if (num_leaves_ <= 1) { return; } if (key_vals.count("left_child")) { - left_child_ = Common::StringToArrayFast(key_vals["left_child"], num_leaves_ - 1); + left_child_ = CommonC::StringToArrayFast(key_vals["left_child"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain left_child field"); } if (key_vals.count("right_child")) { - right_child_ = Common::StringToArrayFast(key_vals["right_child"], num_leaves_ - 1); + right_child_ = CommonC::StringToArrayFast(key_vals["right_child"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain right_child field"); } if (key_vals.count("split_feature")) { - split_feature_ = Common::StringToArrayFast(key_vals["split_feature"], num_leaves_ - 1); + split_feature_ = CommonC::StringToArrayFast(key_vals["split_feature"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain split_feature field"); } if (key_vals.count("threshold")) { - threshold_ = Common::StringToArray(key_vals["threshold"], num_leaves_ - 1); + threshold_ = CommonC::StringToArray(key_vals["threshold"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain threshold field"); } if (key_vals.count("split_gain")) { - split_gain_ = Common::StringToArrayFast(key_vals["split_gain"], num_leaves_ - 1); + split_gain_ = CommonC::StringToArrayFast(key_vals["split_gain"], num_leaves_ - 1); } else { split_gain_.resize(num_leaves_ - 1); } if (key_vals.count("internal_count")) { - internal_count_ = Common::StringToArrayFast(key_vals["internal_count"], num_leaves_ - 1); + internal_count_ = CommonC::StringToArrayFast(key_vals["internal_count"], num_leaves_ - 1); } else { internal_count_.resize(num_leaves_ - 1); } if (key_vals.count("internal_value")) { - internal_value_ = Common::StringToArrayFast(key_vals["internal_value"], num_leaves_ - 1); + internal_value_ = CommonC::StringToArrayFast(key_vals["internal_value"], num_leaves_ - 1); } else { internal_value_.resize(num_leaves_ - 1); } if (key_vals.count("internal_weight")) { - internal_weight_ = Common::StringToArrayFast(key_vals["internal_weight"], num_leaves_ - 1); + internal_weight_ = CommonC::StringToArrayFast(key_vals["internal_weight"], num_leaves_ - 1); } else { internal_weight_.resize(num_leaves_ - 1); } if (key_vals.count("leaf_weight")) { - leaf_weight_ = Common::StringToArray(key_vals["leaf_weight"], num_leaves_); + leaf_weight_ = CommonC::StringToArray(key_vals["leaf_weight"], num_leaves_); } else { leaf_weight_.resize(num_leaves_); } if (key_vals.count("leaf_count")) { - leaf_count_ = Common::StringToArrayFast(key_vals["leaf_count"], num_leaves_); + leaf_count_ = CommonC::StringToArrayFast(key_vals["leaf_count"], num_leaves_); } else { leaf_count_.resize(num_leaves_); } if (key_vals.count("decision_type")) { - decision_type_ = Common::StringToArrayFast(key_vals["decision_type"], num_leaves_ - 1); + decision_type_ = CommonC::StringToArrayFast(key_vals["decision_type"], num_leaves_ - 1); } else { decision_type_ = std::vector(num_leaves_ - 1, 0); } if (num_cat_ > 0) { if (key_vals.count("cat_boundaries")) { - cat_boundaries_ = Common::StringToArrayFast(key_vals["cat_boundaries"], num_cat_ + 1); + cat_boundaries_ = CommonC::StringToArrayFast(key_vals["cat_boundaries"], num_cat_ + 1); } else { Log::Fatal("Tree model should contain cat_boundaries field."); } if (key_vals.count("cat_threshold")) { - cat_threshold_ = Common::StringToArrayFast(key_vals["cat_threshold"], cat_boundaries_.back()); + cat_threshold_ = CommonC::StringToArrayFast(key_vals["cat_threshold"], cat_boundaries_.back()); } else { Log::Fatal("Tree model should contain cat_threshold field"); }