From 4bf22a903df0c50085cd4b323c6d065596547b05 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 11 Sep 2020 17:42:55 +0100 Subject: [PATCH 01/33] Fix LightGBM models locale sensitivity and improve R/W performance. When Java is used, the default C++ locale is broken. This is true for Java providers that use the C API or even Python models that require JEP. This patch solves that issue making the model reads/writes insensitive to such settings. To achieve it, within the model read/write codebase: - C++ streams are imbued with the classic locale - Calls to functions that are dependent on the locale are replaced - The default locale is not changed! This approach means: - The user's locale is never tampered with, avoiding issues such as https://github.com/microsoft/LightGBM/issues/2979 with the previous approach https://github.com/microsoft/LightGBM/pull/2891 - Datasets can still be read according the user's locale - The model file has a single format independent of locale Changes: - Add CommonC namespace which provides faster locale-independent versions of Common's methods - Model code makes conversions through CommonC - Cleanup unused Common methods - Performance improvements. Use fast libraries for locale-agnostic conversion: - value->string: https://github.com/fmtlib/fmt - string->double: https://github.com/lemire/fast_double_parser (10x faster double parsing according to their benchmark) Bugfixes: - https://github.com/microsoft/LightGBM/issues/2500 - https://github.com/microsoft/LightGBM/issues/2890 - https://github.com/ninia/jep/issues/205 (as it is related to LGBM as well) --- .gitmodules | 6 + CMakeLists.txt | 5 + external_libs/fast_double_parser | 1 + external_libs/fmt | 1 + include/LightGBM/utils/common.h | 381 +++++++++++++++++++------------ src/boosting/gbdt_model_text.cpp | 27 ++- src/io/tree.cpp | 69 +++--- 7 files changed, 298 insertions(+), 192 deletions(-) create mode 160000 external_libs/fast_double_parser create mode 160000 external_libs/fmt diff --git a/.gitmodules b/.gitmodules index 133ceb3889da..8f1772cd19fa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "include/boost/compute"] path = compute url = https://github.com/boostorg/compute +[submodule "external_libs/fmt"] + path = external_libs/fmt + url = https://github.com/fmtlib/fmt.git +[submodule "external_libs/fast_double_parser"] + path = external_libs/fast_double_parser + url = https://github.com/lemire/fast_double_parser.git diff --git a/CMakeLists.txt b/CMakeLists.txt index c8559c966ab1..42f93ae04db1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -450,6 +450,11 @@ if(__BUILD_FOR_R) endif(MSVC) endif(__BUILD_FOR_R) +# fmtlib/fmt +add_subdirectory(external_libs/fmt) +TARGET_LINK_LIBRARIES(lightgbm PUBLIC fmt::fmt) +TARGET_LINK_LIBRARIES(_lightgbm PUBLIC fmt::fmt) + install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib diff --git a/external_libs/fast_double_parser b/external_libs/fast_double_parser new file mode 160000 index 000000000000..98c751de3e26 --- /dev/null +++ b/external_libs/fast_double_parser @@ -0,0 +1 @@ +Subproject commit 98c751de3e2681f9baf4e95b3956e0723cbdf5ed diff --git a/external_libs/fmt b/external_libs/fmt new file mode 160000 index 000000000000..2e620ddbcd6c --- /dev/null +++ b/external_libs/fmt @@ -0,0 +1 @@ +Subproject commit 2e620ddbcd6c18c13fbc48b3cf837817c87281f3 diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 07b8484b5577..cc3e7c9e9997 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -5,6 +5,8 @@ #ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ #define LIGHTGBM_UTILS_COMMON_FUN_H_ +#include "../../../external_libs/fmt/include/fmt/format.h" +#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" #include #include @@ -15,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +54,13 @@ namespace LightGBM { namespace Common { +/*! +* Imbues the stream with the C locale. +*/ +static void C_stringstream(std::stringstream &ss) { + ss.imbue(std::locale::classic()); +} + inline static char tolower(char in) { if (in <= 'Z' && in >= 'A') return in - ('Z' - 'z'); @@ -329,94 +339,6 @@ inline static bool AtofAndCheck(const char* p, double* out) { return true; } -inline static unsigned CountDecimalDigit32(uint32_t n) { -#if defined(_MSC_VER) || defined(__GNUC__) - static const uint32_t powers_of_10[] = { - 0, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000 - }; -#ifdef _MSC_VER - // NOLINTNEXTLINE - unsigned long i = 0; - _BitScanReverse(&i, n | 1); - uint32_t t = (i + 1) * 1233 >> 12; -#elif __GNUC__ - uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; -#endif - return t - (n < powers_of_10[t]) + 1; -#else - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -#endif -} - -inline static void Uint32ToStr(uint32_t value, char* buffer) { - const char kDigitsLut[200] = { - '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', - '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', - '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', - '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', - '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', - '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', - '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', - '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', - '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', - '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' - }; - unsigned digit = CountDecimalDigit32(value); - buffer += digit; - *buffer = '\0'; - - while (value >= 100) { - const unsigned i = (value % 100) << 1; - value /= 100; - *--buffer = kDigitsLut[i + 1]; - *--buffer = kDigitsLut[i]; - } - - if (value < 10) { - *--buffer = static_cast(value) + '0'; - } else { - const unsigned i = value << 1; - *--buffer = kDigitsLut[i + 1]; - *--buffer = kDigitsLut[i]; - } -} - -inline static void Int32ToStr(int32_t value, char* buffer) { - uint32_t u = static_cast(value); - if (value < 0) { - *buffer++ = '-'; - u = ~u + 1; - } - Uint32ToStr(u, buffer); -} - -inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) { - #ifdef _MSC_VER - int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value); - #else - int num_chars = snprintf(buffer, buffer_len, "%.17g", value); - #endif - CHECK_GE(num_chars, 0); -} - inline static const char* SkipSpaceAndTab(const char* p) { while (*p == ' ' || *p == '\t') { ++p; @@ -440,67 +362,6 @@ inline static std::vector ArrayCast(const std::vector& arr) { return ret; } -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t) const { - Int32ToStr(value, buffer); - } -}; - -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t buf_len) - const { - #ifdef _MSC_VER - int num_chars = sprintf_s(buffer, buf_len, "%g", value); - #else - int num_chars = snprintf(buffer, buf_len, "%g", value); - #endif - CHECK_GE(num_chars, 0); - } -}; - -template -struct __TToStringHelperFast { - void operator()(T value, char* buffer, size_t) const { - Uint32ToStr(value, buffer); - } -}; - -template -inline static std::string ArrayToStringFast(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - __TToStringHelperFast::value, std::is_unsigned::value> helper; - const size_t buf_len = 16; - std::vector buffer(buf_len); - std::stringstream str_buf; - helper(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - helper(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); - } - return str_buf.str(); -} - -inline static std::string ArrayToString(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - const size_t buf_len = 32; - std::vector buffer(buf_len); - std::stringstream str_buf; - DoubleToStr(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - DoubleToStr(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); - } - return str_buf.str(); -} - template struct __StringToTHelper { T operator()(const std::string& str) const { @@ -588,11 +449,14 @@ inline static std::vector StringToArrayFast(const std::string& str, int n) { } template -inline static std::string Join(const std::vector& strs, const char* delimiter) { +inline static std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale=false) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[0]; for (size_t i = 1; i < strs.size(); ++i) { @@ -603,11 +467,14 @@ inline static std::string Join(const std::vector& strs, const char* delimiter } template<> -inline std::string Join(const std::vector& strs, const char* delimiter) { +inline std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << static_cast(strs[0]); for (size_t i = 1; i < strs.size(); ++i) { @@ -618,13 +485,16 @@ inline std::string Join(const std::vector& strs, const char* del } template -inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { +inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter, const bool force_C_locale=false) { if (end - start <= 0) { return std::string(""); } start = std::min(start, static_cast(strs.size()) - 1); end = std::min(end, static_cast(strs.size())); std::stringstream str_buf; + if (force_C_locale) { + C_stringstream(str_buf); + } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[start]; for (size_t i = start + 1; i < end; ++i) { @@ -1137,6 +1007,213 @@ class FunctionTimer { extern Common::Timer global_timer; + +/*! + * Provides locale-independent alternatives to Common's methods. + * Essential to make models robust to locale settings. + */ +namespace CommonC { + + template + inline static std::string Join(const std::vector& strs, const char* delimiter) { + return LightGBM::Common::Join(strs, delimiter, true); + } + + template + inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { + return LightGBM::Common::Join(strs, start, end, delimiter, true); + } + + inline static const char* Atof(const char* p, double* out) { + return LightGBM::Common::Atof(p, out); + } + + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + return LightGBM::Common::Atoi(p, out); + } + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + */ + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + double tmp = 0.0f; + auto ret = Atof(p, &tmp); + *out = static_cast(tmp); + return ret; + } + }; + + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + T ret = 0; + LightGBM::Common::Atoi(str.c_str(), &ret); + return ret; + } + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. + */ + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + double tmp; + + // Fast (common) path: For numeric inputs in RFC 7159 format: + const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); + + // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: + if (!fast_parse_succeeded) { + std::stringstream ss; + Common::C_stringstream(ss); + ss << str; + ss >> tmp; + } + + return static_cast(tmp); + } + }; + + /*! + * Safely formats a value onto a buffer according to a format string and null-terminates it. + * + * \note It checks that the full value was written or forcefully aborts. + * This safety check serves to prevent incorrect internal API usage. + * Correct usage will never incur in this problem: + * - The received buffer size shall be sufficient at all times for the input format string and value. + */ + template + inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { + auto result = fmt::format_to_n(buffer, buf_len, format, value); + if (result.size >= buf_len) { + Log::Fatal("Numerical conversion failed. Buffer is too small."); + } + buffer[result.size] = '\0'; + } + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{}", value); + } + }; + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:g}", value); + } + }; + + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:.17g}", value); + } + }; + + /*! + * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, + * this method has less precision for floating point numbers than ``StringToArray``, + * which calls ``__StringToTHelper``. + * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! + * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + */ + template + inline static std::vector StringToArrayFast(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + auto p_str = str.c_str(); + __StringToTHelperFast::value> helper; + std::vector ret(n); + for (int i = 0; i < n; ++i) { + p_str = helper(p_str, &ret[i]); + } + return ret; + } + + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); + CHECK_EQ(strs.size(), static_cast(n)); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; + } + + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, char delimiter) { + std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; + } + + /*! + * Converts an array to a string with with values separated by the space character. + * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality + * and is locale-independent. + * + * \note If ``high_precision_output`` is set to true, + * floating point values are output with more digits of precision. + */ + template + inline static std::string ArrayToString(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + __TToStringHelper::value, high_precision_output> helper; + const size_t buf_len = high_precision_output ? 32 : 16; + std::vector buffer(buf_len); + std::stringstream str_buf; + Common::C_stringstream(str_buf); + helper(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + helper(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); + } + +} // Namespace CommonC + + } // namespace LightGBM #endif // LightGBM_UTILS_COMMON_FUN_H_ diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index 4eeb731f587f..e5cec8b61db0 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -20,6 +20,7 @@ const char* kModelVersion = "v3"; std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_importance_type) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "{"; str_buf << "\"name\":\"" << SubModelName() << "\"," << '\n'; @@ -34,16 +35,17 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ str_buf << "\"average_output\":" << (average_output_ ? "true" : "false") << ",\n"; - str_buf << "\"feature_names\":[\"" << Common::Join(feature_names_, "\",\"") + str_buf << "\"feature_names\":[\"" << CommonC::Join(feature_names_, "\",\"") << "\"]," << '\n'; str_buf << "\"monotone_constraints\":[" - << Common::Join(monotone_constraints_, ",") << "]," << '\n'; + << CommonC::Join(monotone_constraints_, ",") << "]," << '\n'; str_buf << "\"feature_infos\":" << "{"; bool first_obj = true; for (size_t i = 0; i < feature_infos_.size(); ++i) { std::stringstream json_str_buf; + Common::C_stringstream(json_str_buf); auto strs = Common::Split(feature_infos_[i].c_str(), ":"); if (strs[0][0] == '[') { strs[0].erase(0, 1); // remove '[' @@ -56,12 +58,12 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ json_str_buf << "\"max_value\":" << Common::AvoidInf(max_) << ","; json_str_buf << "\"values\":[]}"; } else if (strs[0] != "none") { // categorical feature - auto vals = Common::StringToArray(feature_infos_[i], ':'); + auto vals = CommonC::StringToArray(feature_infos_[i], ':'); auto max_idx = ArrayArgs::ArgMax(vals); auto min_idx = ArrayArgs::ArgMin(vals); json_str_buf << "{\"min_value\":" << vals[min_idx] << ","; json_str_buf << "\"max_value\":" << vals[max_idx] << ","; - json_str_buf << "\"values\":[" << Common::Join(vals, ",") << "]}"; + json_str_buf << "\"values\":[" << CommonC::Join(vals, ",") << "]}"; } else { // unused feature continue; } @@ -121,6 +123,7 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_ std::string GBDT::ModelToIfElse(int num_iteration) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "#include \"gbdt.h\"" << '\n'; str_buf << "#include " << '\n'; @@ -155,6 +158,7 @@ std::string GBDT::ModelToIfElse(int num_iteration) const { str_buf << " };" << '\n' << '\n'; std::stringstream pred_str_buf; + Common::C_stringstream(pred_str_buf); pred_str_buf << "\t" << "int early_stop_round_counter = 0;" << '\n'; pred_str_buf << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n'; @@ -186,6 +190,7 @@ std::string GBDT::ModelToIfElse(int num_iteration) const { str_buf << " };" << '\n' << '\n'; std::stringstream pred_str_buf_map; + Common::C_stringstream(pred_str_buf_map); pred_str_buf_map << "\t" << "int early_stop_round_counter = 0;" << '\n'; pred_str_buf_map << "\t" << "std::memset(output, 0, sizeof(double) * num_tree_per_iteration_);" << '\n'; @@ -305,6 +310,7 @@ bool GBDT::SaveModelToIfElse(int num_iteration, const char* filename) const { std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int feature_importance_type) const { std::stringstream ss; + Common::C_stringstream(ss); // output model type ss << SubModelName() << '\n'; @@ -325,14 +331,14 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int ss << "average_output" << '\n'; } - ss << "feature_names=" << Common::Join(feature_names_, " ") << '\n'; + ss << "feature_names=" << CommonC::Join(feature_names_, " ") << '\n'; if (monotone_constraints_.size() != 0) { - ss << "monotone_constraints=" << Common::Join(monotone_constraints_, " ") + ss << "monotone_constraints=" << CommonC::Join(monotone_constraints_, " ") << '\n'; } - ss << "feature_infos=" << Common::Join(feature_infos_, " ") << '\n'; + ss << "feature_infos=" << CommonC::Join(feature_infos_, " ") << '\n'; int num_used_model = static_cast(models_.size()); int total_iteration = num_used_model / num_tree_per_iteration_; @@ -356,7 +362,7 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int tree_sizes[idx] = tree_strs[idx].size(); } - ss << "tree_sizes=" << Common::Join(tree_sizes, " ") << '\n'; + ss << "tree_sizes=" << CommonC::Join(tree_sizes, " ") << '\n'; ss << '\n'; for (int i = 0; i < num_used_model - start_model; ++i) { @@ -491,7 +497,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { // get monotone_constraints if (key_vals.count("monotone_constraints")) { - monotone_constraints_ = Common::StringToArray(key_vals["monotone_constraints"].c_str(), ' '); + monotone_constraints_ = CommonC::StringToArray(key_vals["monotone_constraints"].c_str(), ' '); if (monotone_constraints_.size() != static_cast(max_feature_idx_ + 1)) { Log::Fatal("Wrong size of monotone_constraints"); return false; @@ -533,7 +539,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { p = Common::SkipNewLine(p); } } else { - std::vector tree_sizes = Common::StringToArray(key_vals["tree_sizes"].c_str(), ' '); + std::vector tree_sizes = CommonC::StringToArray(key_vals["tree_sizes"].c_str(), ' '); std::vector tree_boundries(tree_sizes.size() + 1, 0); int num_trees = static_cast(tree_sizes.size()); for (int i = 0; i < num_trees; ++i) { @@ -564,6 +570,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { iter_ = 0; bool is_inparameter = false; std::stringstream ss; + Common::C_stringstream(ss); while (p < end) { auto line_len = Common::GetLine(p); if (line_len > 0) { diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 92f016dfe6b0..a66765e2d638 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -219,37 +219,39 @@ double Tree::GetLowerBoundValue() const { std::string Tree::ToString() const { std::stringstream str_buf; + Common::C_stringstream(str_buf); + str_buf << "num_leaves=" << num_leaves_ << '\n'; str_buf << "num_cat=" << num_cat_ << '\n'; str_buf << "split_feature=" - << Common::ArrayToStringFast(split_feature_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(split_feature_, num_leaves_ - 1) << '\n'; str_buf << "split_gain=" - << Common::ArrayToStringFast(split_gain_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(split_gain_, num_leaves_ - 1) << '\n'; str_buf << "threshold=" - << Common::ArrayToString(threshold_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(threshold_, num_leaves_ - 1) << '\n'; str_buf << "decision_type=" - << Common::ArrayToStringFast(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; str_buf << "left_child=" - << Common::ArrayToStringFast(left_child_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(left_child_, num_leaves_ - 1) << '\n'; str_buf << "right_child=" - << Common::ArrayToStringFast(right_child_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(right_child_, num_leaves_ - 1) << '\n'; str_buf << "leaf_value=" - << Common::ArrayToString(leaf_value_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_value_, num_leaves_) << '\n'; str_buf << "leaf_weight=" - << Common::ArrayToString(leaf_weight_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_weight_, num_leaves_) << '\n'; str_buf << "leaf_count=" - << Common::ArrayToStringFast(leaf_count_, num_leaves_) << '\n'; + << CommonC::ArrayToString(leaf_count_, num_leaves_) << '\n'; str_buf << "internal_value=" - << Common::ArrayToStringFast(internal_value_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_value_, num_leaves_ - 1) << '\n'; str_buf << "internal_weight=" - << Common::ArrayToStringFast(internal_weight_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_weight_, num_leaves_ - 1) << '\n'; str_buf << "internal_count=" - << Common::ArrayToStringFast(internal_count_, num_leaves_ - 1) << '\n'; + << CommonC::ArrayToString(internal_count_, num_leaves_ - 1) << '\n'; if (num_cat_ > 0) { str_buf << "cat_boundaries=" - << Common::ArrayToStringFast(cat_boundaries_, num_cat_ + 1) << '\n'; + << CommonC::ArrayToString(cat_boundaries_, num_cat_ + 1) << '\n'; str_buf << "cat_threshold=" - << Common::ArrayToStringFast(cat_threshold_, cat_threshold_.size()) << '\n'; + << CommonC::ArrayToString(cat_threshold_, cat_threshold_.size()) << '\n'; } str_buf << "shrinkage=" << shrinkage_ << '\n'; str_buf << '\n'; @@ -258,6 +260,7 @@ std::string Tree::ToString() const { std::string Tree::ToJSON() const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << "\"num_leaves\":" << num_leaves_ << "," << '\n'; str_buf << "\"num_cat\":" << num_cat_ << "," << '\n'; @@ -273,6 +276,7 @@ std::string Tree::ToJSON() const { std::string Tree::NodeToJSON(int index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -292,7 +296,7 @@ std::string Tree::NodeToJSON(int index) const { } } } - str_buf << "\"threshold\":\"" << Common::Join(cats, "||") << "\"," << '\n'; + str_buf << "\"threshold\":\"" << CommonC::Join(cats, "||") << "\"," << '\n'; str_buf << "\"decision_type\":\"==\"," << '\n'; } else { str_buf << "\"threshold\":" << Common::AvoidInf(threshold_[index]) << "," << '\n'; @@ -333,6 +337,7 @@ std::string Tree::NodeToJSON(int index) const { std::string Tree::NumericalDecisionIfElse(int node) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); uint8_t missing_type = GetMissingType(decision_type_[node]); bool default_left = GetDecisionType(decision_type_[node], kDefaultLeftMask); if (missing_type == MissingType::None @@ -357,6 +362,7 @@ std::string Tree::NumericalDecisionIfElse(int node) const { std::string Tree::CategoricalDecisionIfElse(int node) const { uint8_t missing_type = GetMissingType(decision_type_[node]); std::stringstream str_buf; + Common::C_stringstream(str_buf); if (missing_type == MissingType::NaN) { str_buf << "if (std::isnan(fval)) { int_fval = -1; } else { int_fval = static_cast(fval); }"; } else { @@ -372,6 +378,7 @@ std::string Tree::CategoricalDecisionIfElse(int node) const { std::string Tree::ToIfElse(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << "double PredictTree" << index; if (predict_leaf_index) { str_buf << "Leaf"; @@ -430,6 +437,7 @@ std::string Tree::ToIfElse(int index, bool predict_leaf_index) const { std::string Tree::NodeToIfElse(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -461,6 +469,7 @@ std::string Tree::NodeToIfElse(int index, bool predict_leaf_index) const { std::string Tree::NodeToIfElseByMap(int index, bool predict_leaf_index) const { std::stringstream str_buf; + Common::C_stringstream(str_buf); str_buf << std::setprecision(std::numeric_limits::digits10 + 2); if (index >= 0) { // non-leaf @@ -523,13 +532,13 @@ Tree::Tree(const char* str, size_t* used_len) { Common::Atoi(key_vals["num_cat"].c_str(), &num_cat_); if (key_vals.count("leaf_value")) { - leaf_value_ = Common::StringToArray(key_vals["leaf_value"], num_leaves_); + leaf_value_ = CommonC::StringToArray(key_vals["leaf_value"], num_leaves_); } else { Log::Fatal("Tree model string format error, should contain leaf_value field"); } if (key_vals.count("shrinkage")) { - Common::Atof(key_vals["shrinkage"].c_str(), &shrinkage_); + CommonC::Atof(key_vals["shrinkage"].c_str(), &shrinkage_); } else { shrinkage_ = 1.0f; } @@ -537,80 +546,80 @@ Tree::Tree(const char* str, size_t* used_len) { if (num_leaves_ <= 1) { return; } if (key_vals.count("left_child")) { - left_child_ = Common::StringToArrayFast(key_vals["left_child"], num_leaves_ - 1); + left_child_ = CommonC::StringToArrayFast(key_vals["left_child"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain left_child field"); } if (key_vals.count("right_child")) { - right_child_ = Common::StringToArrayFast(key_vals["right_child"], num_leaves_ - 1); + right_child_ = CommonC::StringToArrayFast(key_vals["right_child"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain right_child field"); } if (key_vals.count("split_feature")) { - split_feature_ = Common::StringToArrayFast(key_vals["split_feature"], num_leaves_ - 1); + split_feature_ = CommonC::StringToArrayFast(key_vals["split_feature"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain split_feature field"); } if (key_vals.count("threshold")) { - threshold_ = Common::StringToArray(key_vals["threshold"], num_leaves_ - 1); + threshold_ = CommonC::StringToArray(key_vals["threshold"], num_leaves_ - 1); } else { Log::Fatal("Tree model string format error, should contain threshold field"); } if (key_vals.count("split_gain")) { - split_gain_ = Common::StringToArrayFast(key_vals["split_gain"], num_leaves_ - 1); + split_gain_ = CommonC::StringToArrayFast(key_vals["split_gain"], num_leaves_ - 1); } else { split_gain_.resize(num_leaves_ - 1); } if (key_vals.count("internal_count")) { - internal_count_ = Common::StringToArrayFast(key_vals["internal_count"], num_leaves_ - 1); + internal_count_ = CommonC::StringToArrayFast(key_vals["internal_count"], num_leaves_ - 1); } else { internal_count_.resize(num_leaves_ - 1); } if (key_vals.count("internal_value")) { - internal_value_ = Common::StringToArrayFast(key_vals["internal_value"], num_leaves_ - 1); + internal_value_ = CommonC::StringToArrayFast(key_vals["internal_value"], num_leaves_ - 1); } else { internal_value_.resize(num_leaves_ - 1); } if (key_vals.count("internal_weight")) { - internal_weight_ = Common::StringToArrayFast(key_vals["internal_weight"], num_leaves_ - 1); + internal_weight_ = CommonC::StringToArrayFast(key_vals["internal_weight"], num_leaves_ - 1); } else { internal_weight_.resize(num_leaves_ - 1); } if (key_vals.count("leaf_weight")) { - leaf_weight_ = Common::StringToArray(key_vals["leaf_weight"], num_leaves_); + leaf_weight_ = CommonC::StringToArray(key_vals["leaf_weight"], num_leaves_); } else { leaf_weight_.resize(num_leaves_); } if (key_vals.count("leaf_count")) { - leaf_count_ = Common::StringToArrayFast(key_vals["leaf_count"], num_leaves_); + leaf_count_ = CommonC::StringToArrayFast(key_vals["leaf_count"], num_leaves_); } else { leaf_count_.resize(num_leaves_); } if (key_vals.count("decision_type")) { - decision_type_ = Common::StringToArrayFast(key_vals["decision_type"], num_leaves_ - 1); + decision_type_ = CommonC::StringToArrayFast(key_vals["decision_type"], num_leaves_ - 1); } else { decision_type_ = std::vector(num_leaves_ - 1, 0); } if (num_cat_ > 0) { if (key_vals.count("cat_boundaries")) { - cat_boundaries_ = Common::StringToArrayFast(key_vals["cat_boundaries"], num_cat_ + 1); + cat_boundaries_ = CommonC::StringToArrayFast(key_vals["cat_boundaries"], num_cat_ + 1); } else { Log::Fatal("Tree model should contain cat_boundaries field."); } if (key_vals.count("cat_threshold")) { - cat_threshold_ = Common::StringToArrayFast(key_vals["cat_threshold"], cat_boundaries_.back()); + cat_threshold_ = CommonC::StringToArrayFast(key_vals["cat_threshold"], cat_boundaries_.back()); } else { Log::Fatal("Tree model should contain cat_threshold field"); } From 538e102291609c6ddd59a779179edced1b8ce406 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Thu, 24 Sep 2020 10:44:27 +0100 Subject: [PATCH 02/33] Align CommonC namespace --- include/LightGBM/utils/common.h | 364 ++++++++++++++++---------------- 1 file changed, 182 insertions(+), 182 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index cc3e7c9e9997..1db8a577d2de 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -1008,210 +1008,210 @@ class FunctionTimer { extern Common::Timer global_timer; -/*! - * Provides locale-independent alternatives to Common's methods. - * Essential to make models robust to locale settings. - */ -namespace CommonC { - - template - inline static std::string Join(const std::vector& strs, const char* delimiter) { - return LightGBM::Common::Join(strs, delimiter, true); - } - - template - inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { - return LightGBM::Common::Join(strs, start, end, delimiter, true); - } - - inline static const char* Atof(const char* p, double* out) { - return LightGBM::Common::Atof(p, out); - } + /*! + * Provides locale-independent alternatives to Common's methods. + * Essential to make models robust to locale settings. + */ + namespace CommonC { - template - struct __StringToTHelperFast { - const char* operator()(const char*p, T* out) const { - return LightGBM::Common::Atoi(p, out); + template + inline static std::string Join(const std::vector& strs, const char* delimiter) { + return LightGBM::Common::Join(strs, delimiter, true); } - }; - /*! - * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, - * has **less** floating point precision than ``__StringToTHelper``. - * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. - */ - template - struct __StringToTHelperFast { - const char* operator()(const char*p, T* out) const { - double tmp = 0.0f; - auto ret = Atof(p, &tmp); - *out = static_cast(tmp); - return ret; + template + inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { + return LightGBM::Common::Join(strs, start, end, delimiter, true); } - }; - template - struct __StringToTHelper { - T operator()(const std::string& str) const { - T ret = 0; - LightGBM::Common::Atoi(str.c_str(), &ret); - return ret; + inline static const char* Atof(const char* p, double* out) { + return LightGBM::Common::Atof(p, out); } - }; - /*! - * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, - * has **less** floating point precision than ``__StringToTHelper``. - * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. - * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. - */ - template - struct __StringToTHelper { - T operator()(const std::string& str) const { - double tmp; - - // Fast (common) path: For numeric inputs in RFC 7159 format: - const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); - - // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: - if (!fast_parse_succeeded) { - std::stringstream ss; - Common::C_stringstream(ss); - ss << str; - ss >> tmp; + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + return LightGBM::Common::Atoi(p, out); } - - return static_cast(tmp); - } - }; - - /*! - * Safely formats a value onto a buffer according to a format string and null-terminates it. - * - * \note It checks that the full value was written or forcefully aborts. - * This safety check serves to prevent incorrect internal API usage. - * Correct usage will never incur in this problem: - * - The received buffer size shall be sufficient at all times for the input format string and value. - */ - template - inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { - auto result = fmt::format_to_n(buffer, buf_len, format, value); - if (result.size >= buf_len) { - Log::Fatal("Numerical conversion failed. Buffer is too small."); + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + */ + template + struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + double tmp = 0.0f; + auto ret = Atof(p, &tmp); + *out = static_cast(tmp); + return ret; } - buffer[result.size] = '\0'; - } + }; + + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + T ret = 0; + LightGBM::Common::Atoi(str.c_str(), &ret); + return ret; + } + }; + + /*! + * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, + * has **less** floating point precision than ``__StringToTHelper``. + * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. + * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. + */ + template + struct __StringToTHelper { + T operator()(const std::string& str) const { + double tmp; + + // Fast (common) path: For numeric inputs in RFC 7159 format: + const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); + + // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: + if (!fast_parse_succeeded) { + std::stringstream ss; + Common::C_stringstream(ss); + ss << str; + ss >> tmp; + } - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{}", value); + return static_cast(tmp); + } + }; + + /*! + * Safely formats a value onto a buffer according to a format string and null-terminates it. + * + * \note It checks that the full value was written or forcefully aborts. + * This safety check serves to prevent incorrect internal API usage. + * Correct usage will never incur in this problem: + * - The received buffer size shall be sufficient at all times for the input format string and value. + */ + template + inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { + auto result = fmt::format_to_n(buffer, buf_len, format, value); + if (result.size >= buf_len) { + Log::Fatal("Numerical conversion failed. Buffer is too small."); + } + buffer[result.size] = '\0'; } - }; - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:g}", value); - } - }; + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{}", value); + } + }; - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:.17g}", value); - } - }; + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:g}", value); + } + }; - /*! - * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, - * this method has less precision for floating point numbers than ``StringToArray``, - * which calls ``__StringToTHelper``. - * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! - * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - */ - template - inline static std::vector StringToArrayFast(const std::string& str, int n) { - if (n == 0) { - return std::vector(); - } - auto p_str = str.c_str(); - __StringToTHelperFast::value> helper; - std::vector ret(n); - for (int i = 0; i < n; ++i) { - p_str = helper(p_str, &ret[i]); + template + struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:.17g}", value); + } + }; + + /*! + * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, + * this method has less precision for floating point numbers than ``StringToArray``, + * which calls ``__StringToTHelper``. + * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! + * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. + */ + template + inline static std::vector StringToArrayFast(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + auto p_str = str.c_str(); + __StringToTHelperFast::value> helper; + std::vector ret(n); + for (int i = 0; i < n; ++i) { + p_str = helper(p_str, &ret[i]); + } + return ret; } - return ret; - } - /*! - * \warning Do not replace calls to this method by ``StringToArrayFast``. - * This method is more precise for floating point numbers. - * Check ``StringToArrayFast`` for more details. - */ - template - inline static std::vector StringToArray(const std::string& str, int n) { - if (n == 0) { - return std::vector(); - } - std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); - CHECK_EQ(strs.size(), static_cast(n)); - std::vector ret; - ret.reserve(strs.size()); - __StringToTHelper::value> helper; - for (const auto& s : strs) { - ret.push_back(helper(s)); + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); + CHECK_EQ(strs.size(), static_cast(n)); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; } - return ret; - } - /*! - * \warning Do not replace calls to this method by ``StringToArrayFast``. - * This method is more precise for floating point numbers. - * Check ``StringToArrayFast`` for more details. - */ - template - inline static std::vector StringToArray(const std::string& str, char delimiter) { - std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); - std::vector ret; - ret.reserve(strs.size()); - __StringToTHelper::value> helper; - for (const auto& s : strs) { - ret.push_back(helper(s)); + /*! + * \warning Do not replace calls to this method by ``StringToArrayFast``. + * This method is more precise for floating point numbers. + * Check ``StringToArrayFast`` for more details. + */ + template + inline static std::vector StringToArray(const std::string& str, char delimiter) { + std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; } - return ret; - } - /*! - * Converts an array to a string with with values separated by the space character. - * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality - * and is locale-independent. - * - * \note If ``high_precision_output`` is set to true, - * floating point values are output with more digits of precision. - */ - template - inline static std::string ArrayToString(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - __TToStringHelper::value, high_precision_output> helper; - const size_t buf_len = high_precision_output ? 32 : 16; - std::vector buffer(buf_len); - std::stringstream str_buf; - Common::C_stringstream(str_buf); - helper(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - helper(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); + /*! + * Converts an array to a string with with values separated by the space character. + * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality + * and is locale-independent. + * + * \note If ``high_precision_output`` is set to true, + * floating point values are output with more digits of precision. + */ + template + inline static std::string ArrayToString(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + __TToStringHelper::value, high_precision_output> helper; + const size_t buf_len = high_precision_output ? 32 : 16; + std::vector buffer(buf_len); + std::stringstream str_buf; + Common::C_stringstream(str_buf); + helper(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + helper(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); } - return str_buf.str(); - } -} // Namespace CommonC + } // Namespace CommonC } // namespace LightGBM From bfcdb89fe11b5e6ddb74b96ac7ddfddc923c0cfe Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Thu, 24 Sep 2020 18:32:53 +0100 Subject: [PATCH 03/33] Add new external_libs/ to python setup --- python-package/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-package/setup.py b/python-package/setup.py index 4e3242fe2191..61a69c42c299 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -43,6 +43,7 @@ def copy_files_helper(folder_name): if not os.path.isfile(os.path.join(CURRENT_DIR, '_IS_SOURCE_PACKAGE.txt')): copy_files_helper('include') copy_files_helper('src') + copy_files_helper('external_libs') if not os.path.exists(os.path.join(CURRENT_DIR, "compile", "windows")): os.makedirs(os.path.join(CURRENT_DIR, "compile", "windows")) copy_file(os.path.join(CURRENT_DIR, os.path.pardir, "windows", "LightGBM.sln"), From 30ae0c0439bd43c9c20614f7180eaffa0c7b4f8d Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Tue, 29 Sep 2020 17:46:55 +0100 Subject: [PATCH 04/33] Try fast_double_parser fix #1 Testing commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe If it works it should fix more LGBM builds --- external_libs/fast_double_parser | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external_libs/fast_double_parser b/external_libs/fast_double_parser index 98c751de3e26..e09e5aad828b 160000 --- a/external_libs/fast_double_parser +++ b/external_libs/fast_double_parser @@ -1 +1 @@ -Subproject commit 98c751de3e2681f9baf4e95b3956e0723cbdf5ed +Subproject commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe From 3a97aba0c4abed202b00a94f42c0fdb9808bb6e3 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Wed, 30 Sep 2020 11:48:02 +0100 Subject: [PATCH 05/33] CMake: Attempt to link fmt without explicit PUBLIC tag --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 42f93ae04db1..3c5fe0cc8b28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -452,8 +452,8 @@ endif(__BUILD_FOR_R) # fmtlib/fmt add_subdirectory(external_libs/fmt) -TARGET_LINK_LIBRARIES(lightgbm PUBLIC fmt::fmt) -TARGET_LINK_LIBRARIES(_lightgbm PUBLIC fmt::fmt) +TARGET_LINK_LIBRARIES(lightgbm fmt::fmt) +TARGET_LINK_LIBRARIES(_lightgbm fmt::fmt) install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin From 447050e03f5c63b206dd171aca7052874e95244b Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Wed, 30 Sep 2020 12:07:50 +0100 Subject: [PATCH 06/33] Exclude external_libs from linting --- .ci/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/test.sh b/.ci/test.sh index 4224864a4cae..9da8b48f4ddb 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -52,8 +52,8 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then "r-lintr>=2.0" pip install --user cpplint echo "Linting Python code" - pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1 - pydocstyle --convention=numpy --add-ignore=D105 --match-dir="^(?!^compute|test|example).*" --match="(?!^test_|setup).*\.py" . || exit -1 + pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget,./external_libs . || exit -1 + pydocstyle --convention=numpy --add-ignore=D105 --match-dir="^(?!^compute|external_libs|test|example).*" --match="(?!^test_|setup).*\.py" . || exit -1 echo "Linting R code" Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit -1 echo "Linting C++ code" From 37c0cd486efb381f863a774edb593901f9ccc33a Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 12:54:50 +0100 Subject: [PATCH 07/33] Add exernal_libs to MANIFEST.in --- python-package/MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in index 8104cdbe93e9..2732a9962e14 100644 --- a/python-package/MANIFEST.in +++ b/python-package/MANIFEST.in @@ -8,6 +8,7 @@ recursive-include compile/compute/ *.txt recursive-include compile/compute/cmake * recursive-include compile/compute/include * recursive-include compile/compute/meta * +recursive-include compile/external_libs * recursive-include compile/include * recursive-include compile/src * recursive-include compile/windows LightGBM.sln LightGBM.vcxproj From 6dcc5d0b6f104a665f70fa347f2d3379e404db86 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 15:04:10 +0100 Subject: [PATCH 08/33] Set dynamic linking option for fmt. --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c5fe0cc8b28..6f6df282df1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -451,6 +451,9 @@ if(__BUILD_FOR_R) endif(__BUILD_FOR_R) # fmtlib/fmt +if (NOT BUILD_STATIC_LIB) + OPTION(BUILD_SHARED_LIBS "Build shared libs" ON) # fmt reads this flag. +endif(NOT BUILD_STATIC_LIB) add_subdirectory(external_libs/fmt) TARGET_LINK_LIBRARIES(lightgbm fmt::fmt) TARGET_LINK_LIBRARIES(_lightgbm fmt::fmt) From c7c1977fa5dc42adcd1a2ab2b80e8b285bfee891 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 15:29:30 +0100 Subject: [PATCH 09/33] linting issues --- include/LightGBM/utils/common.h | 389 ++++++++++++++++---------------- src/io/tree.cpp | 2 +- 2 files changed, 196 insertions(+), 195 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 1db8a577d2de..d0b5313ae7c8 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -5,11 +5,12 @@ #ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ #define LIGHTGBM_UTILS_COMMON_FUN_H_ -#include "../../../external_libs/fmt/include/fmt/format.h" -#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" #include #include +#include "../../../external_libs/fmt/include/fmt/format.h" +#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" + #include #include #include @@ -449,7 +450,7 @@ inline static std::vector StringToArrayFast(const std::string& str, int n) { } template -inline static std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale=false) { +inline static std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale = false) { if (strs.empty()) { return std::string(""); } @@ -485,7 +486,7 @@ inline std::string Join(const std::vector& strs, const char* del } template -inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter, const bool force_C_locale=false) { +inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter, const bool force_C_locale = false) { if (end - start <= 0) { return std::string(""); } @@ -1008,210 +1009,210 @@ class FunctionTimer { extern Common::Timer global_timer; - /*! - * Provides locale-independent alternatives to Common's methods. - * Essential to make models robust to locale settings. - */ - namespace CommonC { +/*! +* Provides locale-independent alternatives to Common's methods. +* Essential to make models robust to locale settings. +*/ +namespace CommonC { - template - inline static std::string Join(const std::vector& strs, const char* delimiter) { - return LightGBM::Common::Join(strs, delimiter, true); - } +template +inline static std::string Join(const std::vector& strs, const char* delimiter) { + return LightGBM::Common::Join(strs, delimiter, true); +} - template - inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { - return LightGBM::Common::Join(strs, start, end, delimiter, true); - } +template +inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { + return LightGBM::Common::Join(strs, start, end, delimiter, true); +} - inline static const char* Atof(const char* p, double* out) { - return LightGBM::Common::Atof(p, out); - } +inline static const char* Atof(const char* p, double* out) { + return LightGBM::Common::Atof(p, out); +} - template - struct __StringToTHelperFast { - const char* operator()(const char*p, T* out) const { - return LightGBM::Common::Atoi(p, out); - } - }; - - /*! - * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, - * has **less** floating point precision than ``__StringToTHelper``. - * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. - */ - template - struct __StringToTHelperFast { - const char* operator()(const char*p, T* out) const { - double tmp = 0.0f; - auto ret = Atof(p, &tmp); - *out = static_cast(tmp); - return ret; - } - }; - - template - struct __StringToTHelper { - T operator()(const std::string& str) const { - T ret = 0; - LightGBM::Common::Atoi(str.c_str(), &ret); - return ret; - } - }; - - /*! - * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, - * has **less** floating point precision than ``__StringToTHelper``. - * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. - * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. - */ - template - struct __StringToTHelper { - T operator()(const std::string& str) const { - double tmp; - - // Fast (common) path: For numeric inputs in RFC 7159 format: - const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); - - // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: - if (!fast_parse_succeeded) { - std::stringstream ss; - Common::C_stringstream(ss); - ss << str; - ss >> tmp; - } +template +struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + return LightGBM::Common::Atoi(p, out); + } +}; - return static_cast(tmp); - } - }; - - /*! - * Safely formats a value onto a buffer according to a format string and null-terminates it. - * - * \note It checks that the full value was written or forcefully aborts. - * This safety check serves to prevent incorrect internal API usage. - * Correct usage will never incur in this problem: - * - The received buffer size shall be sufficient at all times for the input format string and value. - */ - template - inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { - auto result = fmt::format_to_n(buffer, buf_len, format, value); - if (result.size >= buf_len) { - Log::Fatal("Numerical conversion failed. Buffer is too small."); - } - buffer[result.size] = '\0'; - } +/*! +* \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, +* has **less** floating point precision than ``__StringToTHelper``. +* Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. +* Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. +*/ +template +struct __StringToTHelperFast { + const char* operator()(const char*p, T* out) const { + double tmp = 0.0f; + auto ret = Atof(p, &tmp); + *out = static_cast(tmp); + return ret; + } +}; - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{}", value); - } - }; +template +struct __StringToTHelper { + T operator()(const std::string& str) const { + T ret = 0; + LightGBM::Common::Atoi(str.c_str(), &ret); + return ret; + } +}; - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:g}", value); - } - }; +/*! +* \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, +* has **less** floating point precision than ``__StringToTHelper``. +* Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. +* Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. +* \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. +*/ +template +struct __StringToTHelper { + T operator()(const std::string& str) const { + double tmp; - template - struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:.17g}", value); - } - }; - - /*! - * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, - * this method has less precision for floating point numbers than ``StringToArray``, - * which calls ``__StringToTHelper``. - * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! - * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. - */ - template - inline static std::vector StringToArrayFast(const std::string& str, int n) { - if (n == 0) { - return std::vector(); - } - auto p_str = str.c_str(); - __StringToTHelperFast::value> helper; - std::vector ret(n); - for (int i = 0; i < n; ++i) { - p_str = helper(p_str, &ret[i]); - } - return ret; - } + // Fast (common) path: For numeric inputs in RFC 7159 format: + const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); - /*! - * \warning Do not replace calls to this method by ``StringToArrayFast``. - * This method is more precise for floating point numbers. - * Check ``StringToArrayFast`` for more details. - */ - template - inline static std::vector StringToArray(const std::string& str, int n) { - if (n == 0) { - return std::vector(); - } - std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); - CHECK_EQ(strs.size(), static_cast(n)); - std::vector ret; - ret.reserve(strs.size()); - __StringToTHelper::value> helper; - for (const auto& s : strs) { - ret.push_back(helper(s)); - } - return ret; + // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: + if (!fast_parse_succeeded) { + std::stringstream ss; + Common::C_stringstream(ss); + ss << str; + ss >> tmp; } - /*! - * \warning Do not replace calls to this method by ``StringToArrayFast``. - * This method is more precise for floating point numbers. - * Check ``StringToArrayFast`` for more details. - */ - template - inline static std::vector StringToArray(const std::string& str, char delimiter) { - std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); - std::vector ret; - ret.reserve(strs.size()); - __StringToTHelper::value> helper; - for (const auto& s : strs) { - ret.push_back(helper(s)); - } - return ret; - } + return static_cast(tmp); + } +}; - /*! - * Converts an array to a string with with values separated by the space character. - * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality - * and is locale-independent. - * - * \note If ``high_precision_output`` is set to true, - * floating point values are output with more digits of precision. - */ - template - inline static std::string ArrayToString(const std::vector& arr, size_t n) { - if (arr.empty() || n == 0) { - return std::string(""); - } - __TToStringHelper::value, high_precision_output> helper; - const size_t buf_len = high_precision_output ? 32 : 16; - std::vector buffer(buf_len); - std::stringstream str_buf; - Common::C_stringstream(str_buf); - helper(arr[0], buffer.data(), buf_len); - str_buf << buffer.data(); - for (size_t i = 1; i < std::min(n, arr.size()); ++i) { - helper(arr[i], buffer.data(), buf_len); - str_buf << ' ' << buffer.data(); - } - return str_buf.str(); +/*! +* Safely formats a value onto a buffer according to a format string and null-terminates it. +* +* \note It checks that the full value was written or forcefully aborts. +* This safety check serves to prevent incorrect internal API usage. +* Correct usage will never incur in this problem: +* - The received buffer size shall be sufficient at all times for the input format string and value. +*/ +template +inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { + auto result = fmt::format_to_n(buffer, buf_len, format, value); + if (result.size >= buf_len) { + Log::Fatal("Numerical conversion failed. Buffer is too small."); } + buffer[result.size] = '\0'; +} + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{}", value); + } +}; + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:g}", value); + } +}; + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:.17g}", value); + } +}; + +/*! +* \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, +* this method has less precision for floating point numbers than ``StringToArray``, +* which calls ``__StringToTHelper``. +* As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! +* Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. +*/ +template +inline static std::vector StringToArrayFast(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + auto p_str = str.c_str(); + __StringToTHelperFast::value> helper; + std::vector ret(n); + for (int i = 0; i < n; ++i) { + p_str = helper(p_str, &ret[i]); + } + return ret; +} + +/*! +* \warning Do not replace calls to this method by ``StringToArrayFast``. +* This method is more precise for floating point numbers. +* Check ``StringToArrayFast`` for more details. +*/ +template +inline static std::vector StringToArray(const std::string& str, int n) { + if (n == 0) { + return std::vector(); + } + std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); + CHECK_EQ(strs.size(), static_cast(n)); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; +} + +/*! +* \warning Do not replace calls to this method by ``StringToArrayFast``. +* This method is more precise for floating point numbers. +* Check ``StringToArrayFast`` for more details. +*/ +template +inline static std::vector StringToArray(const std::string& str, char delimiter) { + std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); + std::vector ret; + ret.reserve(strs.size()); + __StringToTHelper::value> helper; + for (const auto& s : strs) { + ret.push_back(helper(s)); + } + return ret; +} + +/*! +* Converts an array to a string with with values separated by the space character. +* This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality +* and is locale-independent. +* +* \note If ``high_precision_output`` is set to true, +* floating point values are output with more digits of precision. +*/ +template +inline static std::string ArrayToString(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + __TToStringHelper::value, high_precision_output> helper; + const size_t buf_len = high_precision_output ? 32 : 16; + std::vector buffer(buf_len); + std::stringstream str_buf; + Common::C_stringstream(str_buf); + helper(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + helper(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); +} - } // Namespace CommonC +} // namespace CommonC } // namespace LightGBM diff --git a/src/io/tree.cpp b/src/io/tree.cpp index a66765e2d638..ccc997d2f5c4 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -220,7 +220,7 @@ double Tree::GetLowerBoundValue() const { std::string Tree::ToString() const { std::stringstream str_buf; Common::C_stringstream(str_buf); - + str_buf << "num_leaves=" << num_leaves_ << '\n'; str_buf << "num_cat=" << num_cat_ << '\n'; str_buf << "split_feature=" From a206715f8f4eeeb58635b493649a77c4e7d11c9d Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 18:56:55 +0100 Subject: [PATCH 10/33] Try to fix lint includes --- include/LightGBM/utils/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index d0b5313ae7c8..a959aabea8f4 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -8,9 +8,6 @@ #include #include -#include "../../../external_libs/fmt/include/fmt/format.h" -#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" - #include #include #include @@ -30,6 +27,9 @@ #include #include +#include "../../../external_libs/fmt/include/fmt/format.h" +#include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" + #ifdef _MSC_VER #include #pragma intrinsic(_BitScanReverse) From f8556ab5bcf6cd31af004c0fb2ed35ad96aaf3b3 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 19:00:26 +0100 Subject: [PATCH 11/33] Try to pass fPIC with static fmt lib --- CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f6df282df1c..1de3d0984d49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -451,9 +451,10 @@ if(__BUILD_FOR_R) endif(__BUILD_FOR_R) # fmtlib/fmt -if (NOT BUILD_STATIC_LIB) - OPTION(BUILD_SHARED_LIBS "Build shared libs" ON) # fmt reads this flag. -endif(NOT BUILD_STATIC_LIB) +#if (NOT BUILD_STATIC_LIB) +# OPTION(BUILD_SHARED_LIBS "Build shared libs" ON) # fmt reads this flag. +#endif(NOT BUILD_STATIC_LIB) +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops") add_subdirectory(external_libs/fmt) TARGET_LINK_LIBRARIES(lightgbm fmt::fmt) TARGET_LINK_LIBRARIES(_lightgbm fmt::fmt) From 18c67730f7a59f883eea667704607e227b333182 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Fri, 2 Oct 2020 19:04:39 +0100 Subject: [PATCH 12/33] Try CMake P_I_C option with fmt library --- CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1de3d0984d49..31d41d437911 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,10 +454,11 @@ endif(__BUILD_FOR_R) #if (NOT BUILD_STATIC_LIB) # OPTION(BUILD_SHARED_LIBS "Build shared libs" ON) # fmt reads this flag. #endif(NOT BUILD_STATIC_LIB) -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops") +#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops") add_subdirectory(external_libs/fmt) -TARGET_LINK_LIBRARIES(lightgbm fmt::fmt) -TARGET_LINK_LIBRARIES(_lightgbm fmt::fmt) +set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) +TARGET_LINK_LIBRARIES(lightgbm fmt) +TARGET_LINK_LIBRARIES(_lightgbm fmt) install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin From 8d6ec54b82ebf136b317c2a67e32485b5cf32b07 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 3 Oct 2020 22:24:05 -0500 Subject: [PATCH 13/33] [R-package] Add CMake support for R and CRAN --- R-package/.Rbuildignore | 21 +++++++++++++++++++++ R-package/src/Makevars.in | 2 ++ R-package/src/Makevars.win.in | 2 ++ build-cran-package.sh | 34 ++++++++++++++++++++++++++++++++++ build_r.R | 11 +++++++++++ 5 files changed, 70 insertions(+) diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index c231a58a126b..c02d584991bb 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -1,10 +1,14 @@ +\.appveyor\.yml AUTOCONF_UBUNTU_VERSION ^autom4te.cache/.*$ ^.*\.bin ^build_r.R$ +\.clang-format ^cran-comments\.md$ ^docs$ ^.*\.dll +\.drone\.yml +\.git \.gitkeep$ ^.*\.history ^Makefile$ @@ -24,3 +28,20 @@ AUTOCONF_UBUNTU_VERSION ^src/compute/.gitignore$ ^src/compute/CONTRIBUTING.md$ ^src/compute/README.md$ +src/external_libs/fast_double_parser/benchmarks +src/external_libs/fast_double_parser/Makefile +src/external_libs/fast_double_parser/.*\.md +src/external_libs/fast_double_parser/tests +src/external_libs/fmt/.*\.md +src/external_libs/fmt/doc +src/external_libs/fmt/support/Android\.mk +src/external_libs/fmt/support/.*\.gradle +src/external_libs/fmt/support/.*\.pro +src/external_libs/fmt/support/.*\.py +src/external_libs/fmt/support/rtd +src/external_libs/fmt/support/.*sublime-syntax +src/external_libs/fmt/support/Vagrantfile +src/external_libs/fmt/support/.*\.xml +src/external_libs/fmt/support/.*\.yml +src/external_libs/fmt/test +\.travis\.yml diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 0c580ebe36bf..bb1a2819ba87 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -26,6 +26,8 @@ OBJECTS = \ boosting/gbdt_model_text.o \ boosting/gbdt_prediction.o \ boosting/prediction_early_stop.o \ + external_libs/fmt/format.o \ + external_libs/fmt/os.o \ io/bin.o \ io/config.o \ io/config_auto.o \ diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index 952508e16a7d..4aba8b0a28c6 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -27,6 +27,8 @@ OBJECTS = \ boosting/gbdt_model_text.o \ boosting/gbdt_prediction.o \ boosting/prediction_early_stop.o \ + fmt/format.o \ + fmt/os.o \ io/bin.o \ io/config.o \ io/config_auto.o \ diff --git a/build-cran-package.sh b/build-cran-package.sh index d21f5f051d16..fbc8a14ad30b 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -28,6 +28,20 @@ cp -R R-package/* ${TEMP_R_DIR} cp -R include ${TEMP_R_DIR}/src/ cp -R src/* ${TEMP_R_DIR}/src/ +cp \ + external_libs/fast_double_parser/include/fast_double_parser.h \ + ${TEMP_R_DIR}/src/include/LightGBM + +mkdir -p ${TEMP_R_DIR}/src/external_libs/fmt +cp \ + external_libs/fmt/src/*.cc \ + ${TEMP_R_DIR}/src/external_libs/fmt/ + +mkdir -p ${TEMP_R_DIR}/src/include/LightGBM/fmt +cp \ + external_libs/fmt/include/fmt/*.h \ + ${TEMP_R_DIR}/src/include/LightGBM/fmt/ + cd ${TEMP_R_DIR} # Remove files not needed for CRAN @@ -67,6 +81,26 @@ cd ${TEMP_R_DIR} done find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \; + sed \ + -i.bak \ + -e 's/fmt\/format-inl\.h/LightGBM\/fmt\/format-inl\.h/' \ + src/external_libs/fmt/format.cc + + sed \ + -i.bak \ + -e 's/fmt\/os\.h/LightGBM\/fmt\/os\.h/' \ + src/external_libs/fmt/os.cc + + sed \ + -i.bak \ + -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \ + src/include/LightGBM/utils/common.h + + sed \ + -i.bak \ + -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \ + src/include/LightGBM/utils/common.h + # When building an R package with 'configure', it seems # you're guaranteed to get a shared library called # .so/dll. The package source code expects diff --git a/build_r.R b/build_r.R index b3a98c45cc6a..719e622a9d01 100644 --- a/build_r.R +++ b/build_r.R @@ -135,6 +135,17 @@ result <- file.remove( ) .handle_result(result) +#------------# +# submodules # +#------------# +result <- file.copy( + from = "external_libs/" + , to = sprintf("%s/", TEMP_SOURCE_DIR) + , recursive = TRUE + , overwrite = TRUE +) +.handle_result(result) + # copy files into the place CMake expects for (src_file in c("lightgbm_R.cpp", "lightgbm_R.h", "R_object_helper.h")) { result <- file.copy( From 859013ac2d989a0c80381d2d11b08ea7fafa06fb Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Tue, 6 Oct 2020 15:36:05 +0100 Subject: [PATCH 14/33] Cleanup CMakeLists --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31d41d437911..df45ad23c90f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -451,10 +451,6 @@ if(__BUILD_FOR_R) endif(__BUILD_FOR_R) # fmtlib/fmt -#if (NOT BUILD_STATIC_LIB) -# OPTION(BUILD_SHARED_LIBS "Build shared libs" ON) # fmt reads this flag. -#endif(NOT BUILD_STATIC_LIB) -#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops") add_subdirectory(external_libs/fmt) set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) TARGET_LINK_LIBRARIES(lightgbm fmt) From 01a07a41090c8f433e89334c5cdedab56053948f Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Tue, 6 Oct 2020 16:54:48 +0100 Subject: [PATCH 15/33] Try fmt hack to remove stdout --- .gitmodules | 2 +- external_libs/fmt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 8f1772cd19fa..7b5813e611fd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/boostorg/compute [submodule "external_libs/fmt"] path = external_libs/fmt - url = https://github.com/fmtlib/fmt.git + url = https://github.com/AlbertoEAF/fmt.git [submodule "external_libs/fast_double_parser"] path = external_libs/fast_double_parser url = https://github.com/lemire/fast_double_parser.git diff --git a/external_libs/fmt b/external_libs/fmt index 2e620ddbcd6c..8a350b09addc 160000 --- a/external_libs/fmt +++ b/external_libs/fmt @@ -1 +1 @@ -Subproject commit 2e620ddbcd6c18c13fbc48b3cf837817c87281f3 +Subproject commit 8a350b09addc42489c17852d6573ae5b72452ff3 From 9ed5a390d48eec38c5d2f8fda140aef40f1828bf Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 17 Oct 2020 10:49:09 +0100 Subject: [PATCH 16/33] Switch to header-only mode --- CMakeLists.txt | 6 +++--- external_libs/fmt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df45ad23c90f..5ad717f03389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -452,9 +452,9 @@ endif(__BUILD_FOR_R) # fmtlib/fmt add_subdirectory(external_libs/fmt) -set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) -TARGET_LINK_LIBRARIES(lightgbm fmt) -TARGET_LINK_LIBRARIES(_lightgbm fmt) +#set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) +TARGET_LINK_LIBRARIES(lightgbm PRIVATE fmt::fmt-header-only) +TARGET_LINK_LIBRARIES(_lightgbm PRIVATE fmt::fmt-header-only) install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin diff --git a/external_libs/fmt b/external_libs/fmt index 8a350b09addc..7277035736eb 160000 --- a/external_libs/fmt +++ b/external_libs/fmt @@ -1 +1 @@ -Subproject commit 8a350b09addc42489c17852d6573ae5b72452ff3 +Subproject commit 7277035736eb991207d8f95077e607b68f4b5ebe From 658b00bf629ab6b73cf33fe92c9c560037e62ae3 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sun, 18 Oct 2020 11:07:02 +0100 Subject: [PATCH 17/33] Add PRIVATE argument to target_link_libraries --- CMakeLists.txt | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ad717f03389..271cc85bdf47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -386,20 +386,20 @@ if(USE_SWIG) endif(USE_SWIG) if(USE_MPI) - TARGET_LINK_LIBRARIES(lightgbm ${MPI_CXX_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm ${MPI_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${MPI_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${MPI_CXX_LIBRARIES}) endif(USE_MPI) if(USE_OPENMP) if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - TARGET_LINK_LIBRARIES(lightgbm OpenMP::OpenMP_CXX) - TARGET_LINK_LIBRARIES(_lightgbm OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE OpenMP::OpenMP_CXX) endif() endif(USE_OPENMP) if(USE_GPU) - TARGET_LINK_LIBRARIES(lightgbm ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) endif(USE_GPU) if(__INTEGRATE_OPENCL) @@ -419,34 +419,36 @@ if(USE_CUDA) set_target_properties(lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) TARGET_LINK_LIBRARIES( lightgbm + PRIVATE ${histograms} ) set_target_properties(_lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) TARGET_LINK_LIBRARIES( _lightgbm + PRIVATE ${histograms} ) endif(USE_CUDA) if(USE_HDFS) - TARGET_LINK_LIBRARIES(lightgbm ${HDFS_CXX_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm ${HDFS_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${HDFS_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${HDFS_CXX_LIBRARIES}) endif(USE_HDFS) if(WIN32) if(MINGW OR CYGWIN) - TARGET_LINK_LIBRARIES(lightgbm Ws2_32) - TARGET_LINK_LIBRARIES(_lightgbm Ws2_32) - TARGET_LINK_LIBRARIES(lightgbm IPHLPAPI) - TARGET_LINK_LIBRARIES(_lightgbm IPHLPAPI) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE Ws2_32) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE Ws2_32) + TARGET_LINK_LIBRARIES(lightgbm PRIVATE IPHLPAPI) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE IPHLPAPI) endif(MINGW OR CYGWIN) endif(WIN32) if(__BUILD_FOR_R) if(MSVC) - TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_MSVC_CORE_LIBRARY}) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${LIBR_MSVC_CORE_LIBRARY}) else() - TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_CORE_LIBRARY}) + TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${LIBR_CORE_LIBRARY}) endif(MSVC) endif(__BUILD_FOR_R) From 91dd2d811bec052bea18e53570c81aaa4971c849 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 31 Oct 2020 23:21:17 -0500 Subject: [PATCH 18/33] use fmt in header-only mode --- R-package/src/Makevars.in | 3 +-- R-package/src/Makevars.win.in | 3 +-- build-cran-package.sh | 15 --------------- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index bb1a2819ba87..da6815ac8b90 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -9,6 +9,7 @@ LGB_CPPFLAGS = \ PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ + -DFMT_HEADER_ONLY \ $(LGB_CPPFLAGS) PKG_CXXFLAGS = \ @@ -26,8 +27,6 @@ OBJECTS = \ boosting/gbdt_model_text.o \ boosting/gbdt_prediction.o \ boosting/prediction_early_stop.o \ - external_libs/fmt/format.o \ - external_libs/fmt/os.o \ io/bin.o \ io/config.o \ io/config_auto.o \ diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index 4aba8b0a28c6..de7fbedfed85 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -9,6 +9,7 @@ LGB_CPPFLAGS = \ PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ + -DFMT_HEADER_ONLY \ $(LGB_CPPFLAGS) PKG_CXXFLAGS = \ @@ -27,8 +28,6 @@ OBJECTS = \ boosting/gbdt_model_text.o \ boosting/gbdt_prediction.o \ boosting/prediction_early_stop.o \ - fmt/format.o \ - fmt/os.o \ io/bin.o \ io/config.o \ io/config_auto.o \ diff --git a/build-cran-package.sh b/build-cran-package.sh index fbc8a14ad30b..a18c9456864b 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -32,11 +32,6 @@ cp \ external_libs/fast_double_parser/include/fast_double_parser.h \ ${TEMP_R_DIR}/src/include/LightGBM -mkdir -p ${TEMP_R_DIR}/src/external_libs/fmt -cp \ - external_libs/fmt/src/*.cc \ - ${TEMP_R_DIR}/src/external_libs/fmt/ - mkdir -p ${TEMP_R_DIR}/src/include/LightGBM/fmt cp \ external_libs/fmt/include/fmt/*.h \ @@ -81,16 +76,6 @@ cd ${TEMP_R_DIR} done find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \; - sed \ - -i.bak \ - -e 's/fmt\/format-inl\.h/LightGBM\/fmt\/format-inl\.h/' \ - src/external_libs/fmt/format.cc - - sed \ - -i.bak \ - -e 's/fmt\/os\.h/LightGBM\/fmt\/os\.h/' \ - src/external_libs/fmt/os.cc - sed \ -i.bak \ -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \ From f90fa3b6bd064f46277d2f0bee7a893e752b0a32 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sun, 1 Nov 2020 12:05:21 +0000 Subject: [PATCH 19/33] Remove CMakeLists comment --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 271cc85bdf47..2f95a75378f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,7 +454,6 @@ endif(__BUILD_FOR_R) # fmtlib/fmt add_subdirectory(external_libs/fmt) -#set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) TARGET_LINK_LIBRARIES(lightgbm PRIVATE fmt::fmt-header-only) TARGET_LINK_LIBRARIES(_lightgbm PRIVATE fmt::fmt-header-only) From d062736e74b135183f9c9bb379c4d0e9d61d4ff1 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sun, 1 Nov 2020 12:15:06 +0000 Subject: [PATCH 20/33] Change OpenMP to PUBLIC linking in Mac --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f95a75378f2..33c401760b9d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -392,8 +392,8 @@ endif(USE_MPI) if(USE_OPENMP) if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - TARGET_LINK_LIBRARIES(lightgbm PRIVATE OpenMP::OpenMP_CXX) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(lightgbm PUBLIC OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(_lightgbm PUBLIC OpenMP::OpenMP_CXX) endif() endif(USE_OPENMP) From e3235fc918e670ab6121be1a863f09a78d524f7a Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 7 Nov 2020 17:12:11 +0000 Subject: [PATCH 21/33] Update fmt submodule to 7.1.2 --- .gitmodules | 2 +- external_libs/fmt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 7b5813e611fd..8f1772cd19fa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/boostorg/compute [submodule "external_libs/fmt"] path = external_libs/fmt - url = https://github.com/AlbertoEAF/fmt.git + url = https://github.com/fmtlib/fmt.git [submodule "external_libs/fast_double_parser"] path = external_libs/fast_double_parser url = https://github.com/lemire/fast_double_parser.git diff --git a/external_libs/fmt b/external_libs/fmt index 7277035736eb..cc09f1a6798c 160000 --- a/external_libs/fmt +++ b/external_libs/fmt @@ -1 +1 @@ -Subproject commit 7277035736eb991207d8f95077e607b68f4b5ebe +Subproject commit cc09f1a6798c085c325569ef466bcdcffdc266d4 From e4997bd16887e5ccff390bb8a738608b154c84ff Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 7 Nov 2020 17:34:15 +0000 Subject: [PATCH 22/33] Use fmt in header-only-mode --- CMakeLists.txt | 6 +++--- include/LightGBM/utils/common.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33c401760b9d..e0c9934daba8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -453,9 +453,9 @@ if(__BUILD_FOR_R) endif(__BUILD_FOR_R) # fmtlib/fmt -add_subdirectory(external_libs/fmt) -TARGET_LINK_LIBRARIES(lightgbm PRIVATE fmt::fmt-header-only) -TARGET_LINK_LIBRARIES(_lightgbm PRIVATE fmt::fmt-header-only) +#add_subdirectory(external_libs/fmt) +#TARGET_LINK_LIBRARIES(lightgbm PRIVATE fmt::fmt-header-only) +#TARGET_LINK_LIBRARIES(_lightgbm PRIVATE fmt::fmt-header-only) install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index a959aabea8f4..4e6270486541 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -27,6 +27,7 @@ #include #include +#define FMT_HEADER_ONLY #include "../../../external_libs/fmt/include/fmt/format.h" #include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" From 5c169139034c7e441bd4ea732615bb941e2c64ae Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 7 Nov 2020 17:35:01 +0000 Subject: [PATCH 23/33] Remove fmt from CMakeLists.txt --- CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0c9934daba8..9629864dc0ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -452,11 +452,6 @@ if(__BUILD_FOR_R) endif(MSVC) endif(__BUILD_FOR_R) -# fmtlib/fmt -#add_subdirectory(external_libs/fmt) -#TARGET_LINK_LIBRARIES(lightgbm PRIVATE fmt::fmt-header-only) -#TARGET_LINK_LIBRARIES(_lightgbm PRIVATE fmt::fmt-header-only) - install(TARGETS lightgbm _lightgbm RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib From 03bd3b242a5fbff9c8a253dade0d7a46ab1fb71c Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 7 Nov 2020 16:41:49 +0000 Subject: [PATCH 24/33] Upgrade fast_double_parser to v0.2.0 --- external_libs/fast_double_parser | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external_libs/fast_double_parser b/external_libs/fast_double_parser index e09e5aad828b..94faf8de8aeb 160000 --- a/external_libs/fast_double_parser +++ b/external_libs/fast_double_parser @@ -1 +1 @@ -Subproject commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe +Subproject commit 94faf8de8aeb4a2930d8a5b95670c59a2ea8750b From 51843f78c4a56cbd1b300896a676bc1cab13ce22 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Tue, 10 Nov 2020 00:35:17 +0000 Subject: [PATCH 25/33] Revert "Add PRIVATE argument to target_link_libraries" This reverts commit 3dd45dde7b92531b2530ab54522bb843c56227a7. --- CMakeLists.txt | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9629864dc0ef..c8559c966ab1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -386,20 +386,20 @@ if(USE_SWIG) endif(USE_SWIG) if(USE_MPI) - TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${MPI_CXX_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${MPI_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm ${MPI_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm ${MPI_CXX_LIBRARIES}) endif(USE_MPI) if(USE_OPENMP) if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - TARGET_LINK_LIBRARIES(lightgbm PUBLIC OpenMP::OpenMP_CXX) - TARGET_LINK_LIBRARIES(_lightgbm PUBLIC OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(lightgbm OpenMP::OpenMP_CXX) + TARGET_LINK_LIBRARIES(_lightgbm OpenMP::OpenMP_CXX) endif() endif(USE_OPENMP) if(USE_GPU) - TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm ${OpenCL_LIBRARY} ${Boost_LIBRARIES}) endif(USE_GPU) if(__INTEGRATE_OPENCL) @@ -419,36 +419,34 @@ if(USE_CUDA) set_target_properties(lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) TARGET_LINK_LIBRARIES( lightgbm - PRIVATE ${histograms} ) set_target_properties(_lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) TARGET_LINK_LIBRARIES( _lightgbm - PRIVATE ${histograms} ) endif(USE_CUDA) if(USE_HDFS) - TARGET_LINK_LIBRARIES(lightgbm PRIVATE ${HDFS_CXX_LIBRARIES}) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${HDFS_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(lightgbm ${HDFS_CXX_LIBRARIES}) + TARGET_LINK_LIBRARIES(_lightgbm ${HDFS_CXX_LIBRARIES}) endif(USE_HDFS) if(WIN32) if(MINGW OR CYGWIN) - TARGET_LINK_LIBRARIES(lightgbm PRIVATE Ws2_32) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE Ws2_32) - TARGET_LINK_LIBRARIES(lightgbm PRIVATE IPHLPAPI) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE IPHLPAPI) + TARGET_LINK_LIBRARIES(lightgbm Ws2_32) + TARGET_LINK_LIBRARIES(_lightgbm Ws2_32) + TARGET_LINK_LIBRARIES(lightgbm IPHLPAPI) + TARGET_LINK_LIBRARIES(_lightgbm IPHLPAPI) endif(MINGW OR CYGWIN) endif(WIN32) if(__BUILD_FOR_R) if(MSVC) - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${LIBR_MSVC_CORE_LIBRARY}) + TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_MSVC_CORE_LIBRARY}) else() - TARGET_LINK_LIBRARIES(_lightgbm PRIVATE ${LIBR_CORE_LIBRARY}) + TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_CORE_LIBRARY}) endif(MSVC) endif(__BUILD_FOR_R) From 90f8bd8683e7c701c42b61f8a4009db473f2f4f8 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 14 Nov 2020 12:18:02 +0000 Subject: [PATCH 26/33] Address James Lamb's comments --- R-package/DESCRIPTION | 3 +++ R-package/README.md | 2 ++ R-package/src/Makevars.in | 1 - R-package/src/Makevars.win.in | 1 - 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 6e969b087c30..6e61f459c99b 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -20,6 +20,9 @@ Authors@R: c( person("Jay", "Loden", role = c("cph")), person("Dave", "Daeschler", role = c("cph")), person("Giampaolo", "Rodola", role = c("cph")), + person("Alberto", "Ferreira", role = c("ctb")), + person("Daniel", "Lemire", role = c("ctb")), + person("Victor", "Zverovich", role = c("cph")), person("IBM Corporation", role = c("ctb")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. diff --git a/R-package/README.md b/R-package/README.md index f237f14686f6..6231e9edd9a8 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -251,6 +251,8 @@ For more information on this approach, see ["Writing R Extensions"](https://cran From the root of the repository, run the following. ```shell +git submodule init +git submodule update sh build-cran-package.sh ``` diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index da6815ac8b90..0c580ebe36bf 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -9,7 +9,6 @@ LGB_CPPFLAGS = \ PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ - -DFMT_HEADER_ONLY \ $(LGB_CPPFLAGS) PKG_CXXFLAGS = \ diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index de7fbedfed85..952508e16a7d 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -9,7 +9,6 @@ LGB_CPPFLAGS = \ PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ - -DFMT_HEADER_ONLY \ $(LGB_CPPFLAGS) PKG_CXXFLAGS = \ From eda47591bcc2cf7e595007f2675c53058bf89217 Mon Sep 17 00:00:00 2001 From: Alberto Ferreira Date: Sat, 14 Nov 2020 12:10:44 +0000 Subject: [PATCH 27/33] Update R-package/.Rbuildignore Co-authored-by: James Lamb --- R-package/.Rbuildignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index c02d584991bb..2aeecfe18cc8 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -32,6 +32,8 @@ src/external_libs/fast_double_parser/benchmarks src/external_libs/fast_double_parser/Makefile src/external_libs/fast_double_parser/.*\.md src/external_libs/fast_double_parser/tests +src/external_libs/fast_double_parser/.*\.yaml +src/external_libs/fast_double_parser/.*\.yml src/external_libs/fmt/.*\.md src/external_libs/fmt/doc src/external_libs/fmt/support/Android\.mk From 082c1d45d9a8a458fe048897204d11a7a23fc91c Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 14 Nov 2020 12:20:28 +0000 Subject: [PATCH 28/33] Upgrade to fast_double_parser v0.3.0 - Solaris support --- external_libs/fast_double_parser | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external_libs/fast_double_parser b/external_libs/fast_double_parser index 94faf8de8aeb..ace60646c02d 160000 --- a/external_libs/fast_double_parser +++ b/external_libs/fast_double_parser @@ -1 +1 @@ -Subproject commit 94faf8de8aeb4a2930d8a5b95670c59a2ea8750b +Subproject commit ace60646c02dc54c57f19d644e49a61e7e7758ec From cc19dafcfb533438ba3793627778d6c5d12f8e8b Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Mon, 16 Nov 2020 19:54:35 +0000 Subject: [PATCH 29/33] Use legacy code only in Solaris --- include/LightGBM/utils/common.h | 83 +++++---- .../LightGBM/utils/common_legacy_solaris.h | 174 ++++++++++++++++++ src/io/tree.cpp | 34 ++-- 3 files changed, 240 insertions(+), 51 deletions(-) create mode 100644 include/LightGBM/utils/common_legacy_solaris.h diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 4e6270486541..f0a7ff49f46e 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -5,6 +5,9 @@ #ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ #define LIGHTGBM_UTILS_COMMON_FUN_H_ +#if (defined(sun) || defined(__sun)) +#include +#endif #include #include @@ -27,8 +30,10 @@ #include #include +#if (! (defined(sun) || defined(__sun))) #define FMT_HEADER_ONLY #include "../../../external_libs/fmt/include/fmt/format.h" +#endif #include "../../../external_libs/fast_double_parser/include/fast_double_parser.h" #ifdef _MSC_VER @@ -1089,43 +1094,6 @@ struct __StringToTHelper { } }; -/*! -* Safely formats a value onto a buffer according to a format string and null-terminates it. -* -* \note It checks that the full value was written or forcefully aborts. -* This safety check serves to prevent incorrect internal API usage. -* Correct usage will never incur in this problem: -* - The received buffer size shall be sufficient at all times for the input format string and value. -*/ -template -inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { - auto result = fmt::format_to_n(buffer, buf_len, format, value); - if (result.size >= buf_len) { - Log::Fatal("Numerical conversion failed. Buffer is too small."); - } - buffer[result.size] = '\0'; -} - -template -struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{}", value); - } -}; - -template -struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:g}", value); - } -}; - -template -struct __TToStringHelper { - void operator()(T value, char* buffer, size_t buf_len) const { - format_to_buf(buffer, buf_len, "{:.17g}", value); - } -}; /*! * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, @@ -1186,6 +1154,45 @@ inline static std::vector StringToArray(const std::string& str, char delimite return ret; } +#if (!(defined(sun) || defined(__sun))) +/*! +* Safely formats a value onto a buffer according to a format string and null-terminates it. +* +* \note It checks that the full value was written or forcefully aborts. +* This safety check serves to prevent incorrect internal API usage. +* Correct usage will never incur in this problem: +* - The received buffer size shall be sufficient at all times for the input format string and value. +*/ +template +inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { + auto result = fmt::format_to_n(buffer, buf_len, format, value); + if (result.size >= buf_len) { + Log::Fatal("Numerical conversion failed. Buffer is too small."); + } + buffer[result.size] = '\0'; +} + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{}", value); + } +}; + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:g}", value); + } +}; + +template +struct __TToStringHelper { + void operator()(T value, char* buffer, size_t buf_len) const { + format_to_buf(buffer, buf_len, "{:.17g}", value); + } +}; + /*! * Converts an array to a string with with values separated by the space character. * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality @@ -1212,6 +1219,8 @@ inline static std::string ArrayToString(const std::vector& arr, size_t n) { } return str_buf.str(); } +#endif // (! (defined(sun) || defined(__sun))) + } // namespace CommonC diff --git a/include/LightGBM/utils/common_legacy_solaris.h b/include/LightGBM/utils/common_legacy_solaris.h new file mode 100644 index 000000000000..f41da791cb11 --- /dev/null +++ b/include/LightGBM/utils/common_legacy_solaris.h @@ -0,0 +1,174 @@ +/*! + * Copyright (c) 2016 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ +/*! + * This file is meant to be used ONLY IN SOLARIS! + * The newer code that replaced it is faster and safe regarding locale! + */ +#ifndef LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ +#define LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ + +#include + +#include +#include +#include + +namespace LightGBM { + +namespace CommonLegacy { + +inline static unsigned CountDecimalDigit32(uint32_t n) { +#if defined(__GNUC__) + static const uint32_t powers_of_10[] = { + 0, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000 + }; + uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; + return t - (n < powers_of_10[t]) + 1; +#else + if (n < 10) return 1; + else if (n < 100) return 2; + else if (n < 1000) return 3; + else if (n < 10000) return 4; + else if (n < 100000) return 5; + else if (n < 1000000) return 6; + else if (n < 10000000) return 7; + else if (n < 100000000) return 8; + else if (n < 1000000000) return 9; + else return 10; +#endif +} + +inline static void Uint32ToStr(uint32_t value, char* buffer) { + const char kDigitsLut[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' + }; + unsigned digit = CountDecimalDigit32(value); + buffer += digit; + *buffer = '\0'; + + while (value >= 100) { + const unsigned i = (value % 100) << 1; + value /= 100; + *--buffer = kDigitsLut[i + 1]; + *--buffer = kDigitsLut[i]; + } + + if (value < 10) { + *--buffer = static_cast(value) + '0'; + } else { + const unsigned i = value << 1; + *--buffer = kDigitsLut[i + 1]; + *--buffer = kDigitsLut[i]; + } +} + +inline static void Int32ToStr(int32_t value, char* buffer) { + uint32_t u = static_cast(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + Uint32ToStr(u, buffer); +} + +inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) { + int num_chars = snprintf(buffer, buffer_len, "%.17g", value); + CHECK_GE(num_chars, 0); +} + + +template +struct __TToStringHelperFast { + void operator()(T value, char* buffer, size_t) const { + Int32ToStr(value, buffer); + } +}; + +template +struct __TToStringHelperFast { + void operator()(T value, char* buffer, size_t buf_len) const { + int num_chars = snprintf(buffer, buf_len, "%g", value); + CHECK_GE(num_chars, 0); + } +}; + +template +struct __TToStringHelperFast { + void operator()(T value, char* buffer, size_t) const { + Uint32ToStr(value, buffer); + } +}; + +template +inline static std::string _ArrayToStringFast(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + __TToStringHelperFast::value, std::is_unsigned::value> helper; + const size_t buf_len = 16; + std::vector buffer(buf_len); + std::stringstream str_buf; + helper(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + helper(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); +} + +inline static std::string _ArrayToString(const std::vector& arr, size_t n) { + if (arr.empty() || n == 0) { + return std::string(""); + } + const size_t buf_len = 32; + std::vector buffer(buf_len); + std::stringstream str_buf; + DoubleToStr(arr[0], buffer.data(), buf_len); + str_buf << buffer.data(); + for (size_t i = 1; i < std::min(n, arr.size()); ++i) { + DoubleToStr(arr[i], buffer.data(), buf_len); + str_buf << ' ' << buffer.data(); + } + return str_buf.str(); +} + + +template +inline static typename std::enable_if::type +ArrayToString(const std::vector& arr, size_t n) { + return _ArrayToStringFast(arr, n); +} + +template +inline static typename std::enable_if< +(high_precision_output==true) && (std::is_same::value), std::string>::type +ArrayToString(const std::vector& arr, size_t n) { + return _ArrayToString(arr, n); +} + +} // namespace CommonLegacy + +} // namespace LightGBM + +#endif // LightGBM_UTILS_COMMON_FUN_H_ diff --git a/src/io/tree.cpp b/src/io/tree.cpp index ccc997d2f5c4..a7e90b607619 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -221,37 +221,43 @@ std::string Tree::ToString() const { std::stringstream str_buf; Common::C_stringstream(str_buf); + #if (defined(sun) || defined(__sun)) + using CommonLegacy::ArrayToString; // Slower & unsafe regarding locale. + #else + using CommonC::ArrayToString; + #endif + str_buf << "num_leaves=" << num_leaves_ << '\n'; str_buf << "num_cat=" << num_cat_ << '\n'; str_buf << "split_feature=" - << CommonC::ArrayToString(split_feature_, num_leaves_ - 1) << '\n'; + << ArrayToString(split_feature_, num_leaves_ - 1) << '\n'; str_buf << "split_gain=" - << CommonC::ArrayToString(split_gain_, num_leaves_ - 1) << '\n'; + << ArrayToString(split_gain_, num_leaves_ - 1) << '\n'; str_buf << "threshold=" - << CommonC::ArrayToString(threshold_, num_leaves_ - 1) << '\n'; + << ArrayToString(threshold_, num_leaves_ - 1) << '\n'; str_buf << "decision_type=" - << CommonC::ArrayToString(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; + << ArrayToString(Common::ArrayCast(decision_type_), num_leaves_ - 1) << '\n'; str_buf << "left_child=" - << CommonC::ArrayToString(left_child_, num_leaves_ - 1) << '\n'; + << ArrayToString(left_child_, num_leaves_ - 1) << '\n'; str_buf << "right_child=" - << CommonC::ArrayToString(right_child_, num_leaves_ - 1) << '\n'; + << ArrayToString(right_child_, num_leaves_ - 1) << '\n'; str_buf << "leaf_value=" - << CommonC::ArrayToString(leaf_value_, num_leaves_) << '\n'; + << ArrayToString(leaf_value_, num_leaves_) << '\n'; str_buf << "leaf_weight=" - << CommonC::ArrayToString(leaf_weight_, num_leaves_) << '\n'; + << ArrayToString(leaf_weight_, num_leaves_) << '\n'; str_buf << "leaf_count=" - << CommonC::ArrayToString(leaf_count_, num_leaves_) << '\n'; + << ArrayToString(leaf_count_, num_leaves_) << '\n'; str_buf << "internal_value=" - << CommonC::ArrayToString(internal_value_, num_leaves_ - 1) << '\n'; + << ArrayToString(internal_value_, num_leaves_ - 1) << '\n'; str_buf << "internal_weight=" - << CommonC::ArrayToString(internal_weight_, num_leaves_ - 1) << '\n'; + << ArrayToString(internal_weight_, num_leaves_ - 1) << '\n'; str_buf << "internal_count=" - << CommonC::ArrayToString(internal_count_, num_leaves_ - 1) << '\n'; + << ArrayToString(internal_count_, num_leaves_ - 1) << '\n'; if (num_cat_ > 0) { str_buf << "cat_boundaries=" - << CommonC::ArrayToString(cat_boundaries_, num_cat_ + 1) << '\n'; + << ArrayToString(cat_boundaries_, num_cat_ + 1) << '\n'; str_buf << "cat_threshold=" - << CommonC::ArrayToString(cat_threshold_, cat_threshold_.size()) << '\n'; + << ArrayToString(cat_threshold_, cat_threshold_.size()) << '\n'; } str_buf << "shrinkage=" << shrinkage_ << '\n'; str_buf << '\n'; From 69d048889deb74bc7ca894ef7a4cc3db5257d5e3 Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 21 Nov 2020 23:24:06 +0000 Subject: [PATCH 30/33] Fix lint issues --- include/LightGBM/utils/common.h | 4 +-- .../LightGBM/utils/common_legacy_solaris.h | 26 +++++-------------- src/io/tree.cpp | 2 +- 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index f0a7ff49f46e..2d5ce27d2912 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -30,7 +30,7 @@ #include #include -#if (! (defined(sun) || defined(__sun))) +#if (!(defined(sun) || defined(__sun))) #define FMT_HEADER_ONLY #include "../../../external_libs/fmt/include/fmt/format.h" #endif @@ -1219,7 +1219,7 @@ inline static std::string ArrayToString(const std::vector& arr, size_t n) { } return str_buf.str(); } -#endif // (! (defined(sun) || defined(__sun))) +#endif // (! (defined(sun) || defined(__sun))) } // namespace CommonC diff --git a/include/LightGBM/utils/common_legacy_solaris.h b/include/LightGBM/utils/common_legacy_solaris.h index f41da791cb11..13d02e3ed7f6 100644 --- a/include/LightGBM/utils/common_legacy_solaris.h +++ b/include/LightGBM/utils/common_legacy_solaris.h @@ -11,31 +11,17 @@ #include +#include #include #include #include +#include namespace LightGBM { namespace CommonLegacy { inline static unsigned CountDecimalDigit32(uint32_t n) { -#if defined(__GNUC__) - static const uint32_t powers_of_10[] = { - 0, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000 - }; - uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; - return t - (n < powers_of_10[t]) + 1; -#else if (n < 10) return 1; else if (n < 100) return 2; else if (n < 1000) return 3; @@ -45,8 +31,8 @@ inline static unsigned CountDecimalDigit32(uint32_t n) { else if (n < 10000000) return 7; else if (n < 100000000) return 8; else if (n < 1000000000) return 9; - else return 10; -#endif + else + return 10; } inline static void Uint32ToStr(uint32_t value, char* buffer) { @@ -155,14 +141,14 @@ inline static std::string _ArrayToString(const std::vector& arr, size_t template -inline static typename std::enable_if::type +inline static typename std::enable_if::type ArrayToString(const std::vector& arr, size_t n) { return _ArrayToStringFast(arr, n); } template inline static typename std::enable_if< -(high_precision_output==true) && (std::is_same::value), std::string>::type +(high_precision_output == true) && (std::is_same::value), std::string>::type ArrayToString(const std::vector& arr, size_t n) { return _ArrayToString(arr, n); } diff --git a/src/io/tree.cpp b/src/io/tree.cpp index a7e90b607619..f94edc49ce87 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -222,7 +222,7 @@ std::string Tree::ToString() const { Common::C_stringstream(str_buf); #if (defined(sun) || defined(__sun)) - using CommonLegacy::ArrayToString; // Slower & unsafe regarding locale. + using CommonLegacy::ArrayToString; // Slower & unsafe regarding locale. #else using CommonC::ArrayToString; #endif From 1411133826a6ed30ce63aee2e839981c4bf2ce8e Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Sat, 21 Nov 2020 23:47:24 +0000 Subject: [PATCH 31/33] Fix comment --- include/LightGBM/utils/common_legacy_solaris.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/LightGBM/utils/common_legacy_solaris.h b/include/LightGBM/utils/common_legacy_solaris.h index 13d02e3ed7f6..f244ff8bc9ab 100644 --- a/include/LightGBM/utils/common_legacy_solaris.h +++ b/include/LightGBM/utils/common_legacy_solaris.h @@ -157,4 +157,4 @@ ArrayToString(const std::vector& arr, size_t n) { } // namespace LightGBM -#endif // LightGBM_UTILS_COMMON_FUN_H_ +#endif // LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ From ba28c0fc27e93dec9513494396fedc8d238a8d4b Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Mon, 23 Nov 2020 17:56:40 +0000 Subject: [PATCH 32/33] Address StrikerRUS's comments (solaris ifdef). --- R-package/README.md | 3 +-- include/LightGBM/utils/common.h | 8 ++++---- include/LightGBM/utils/common_legacy_solaris.h | 6 +++--- src/io/tree.cpp | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R-package/README.md b/R-package/README.md index 6231e9edd9a8..fc787faed117 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -251,8 +251,7 @@ For more information on this approach, see ["Writing R Extensions"](https://cran From the root of the repository, run the following. ```shell -git submodule init -git submodule update +git submodule update --init --recursive sh build-cran-package.sh ``` diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 2d5ce27d2912..3f3762e293ac 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -5,7 +5,7 @@ #ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ #define LIGHTGBM_UTILS_COMMON_FUN_H_ -#if (defined(sun) || defined(__sun)) +#if ((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))) #include #endif #include @@ -30,7 +30,7 @@ #include #include -#if (!(defined(sun) || defined(__sun))) +#if (!((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)))) #define FMT_HEADER_ONLY #include "../../../external_libs/fmt/include/fmt/format.h" #endif @@ -1154,7 +1154,7 @@ inline static std::vector StringToArray(const std::string& str, char delimite return ret; } -#if (!(defined(sun) || defined(__sun))) +#if (!((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)))) /*! * Safely formats a value onto a buffer according to a format string and null-terminates it. * @@ -1219,7 +1219,7 @@ inline static std::string ArrayToString(const std::vector& arr, size_t n) { } return str_buf.str(); } -#endif // (! (defined(sun) || defined(__sun))) +#endif // (!((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)))) } // namespace CommonC diff --git a/include/LightGBM/utils/common_legacy_solaris.h b/include/LightGBM/utils/common_legacy_solaris.h index f244ff8bc9ab..9c1dacc09976 100644 --- a/include/LightGBM/utils/common_legacy_solaris.h +++ b/include/LightGBM/utils/common_legacy_solaris.h @@ -6,8 +6,8 @@ * This file is meant to be used ONLY IN SOLARIS! * The newer code that replaced it is faster and safe regarding locale! */ -#ifndef LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ -#define LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ +#ifndef LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ +#define LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ #include @@ -157,4 +157,4 @@ ArrayToString(const std::vector& arr, size_t n) { } // namespace LightGBM -#endif // LIGHTGBM_UTILS_COMMON_FUN_LEGACY_H_ +#endif // LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ diff --git a/src/io/tree.cpp b/src/io/tree.cpp index f94edc49ce87..a4e42831b0e7 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -221,7 +221,7 @@ std::string Tree::ToString() const { std::stringstream str_buf; Common::C_stringstream(str_buf); - #if (defined(sun) || defined(__sun)) + #if ((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))) using CommonLegacy::ArrayToString; // Slower & unsafe regarding locale. #else using CommonC::ArrayToString; From 7e1772980c349021a09dc438b36401a17bf5e2ba Mon Sep 17 00:00:00 2001 From: AlbertoEAF Date: Tue, 24 Nov 2020 14:06:56 +0000 Subject: [PATCH 33/33] Change header guards --- include/LightGBM/utils/common.h | 6 +++--- include/LightGBM/utils/common_legacy_solaris.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 3f3762e293ac..dd71782fb1ae 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -2,8 +2,8 @@ * Copyright (c) 2016 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. */ -#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_ -#define LIGHTGBM_UTILS_COMMON_FUN_H_ +#ifndef LIGHTGBM_UTILS_COMMON_H_ +#define LIGHTGBM_UTILS_COMMON_H_ #if ((defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))) #include @@ -1227,4 +1227,4 @@ inline static std::string ArrayToString(const std::vector& arr, size_t n) { } // namespace LightGBM -#endif // LightGBM_UTILS_COMMON_FUN_H_ +#endif // LIGHTGBM_UTILS_COMMON_H_ diff --git a/include/LightGBM/utils/common_legacy_solaris.h b/include/LightGBM/utils/common_legacy_solaris.h index 9c1dacc09976..97f977108fc6 100644 --- a/include/LightGBM/utils/common_legacy_solaris.h +++ b/include/LightGBM/utils/common_legacy_solaris.h @@ -6,8 +6,8 @@ * This file is meant to be used ONLY IN SOLARIS! * The newer code that replaced it is faster and safe regarding locale! */ -#ifndef LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ -#define LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ +#ifndef LIGHTGBM_UTILS_COMMON_LEGACY_SOLARIS_H_ +#define LIGHTGBM_UTILS_COMMON_LEGACY_SOLARIS_H_ #include @@ -157,4 +157,4 @@ ArrayToString(const std::vector& arr, size_t n) { } // namespace LightGBM -#endif // LIGHTGBM_UTILS_COMMON_FUN_LEGACY_SOLARIS_H_ +#endif // LIGHTGBM_UTILS_COMMON_LEGACY_SOLARIS_H_