From 2313c01e00a333f3c7a02cc4817bc291b4e349f9 Mon Sep 17 00:00:00 2001 From: jiamingy Date: Sun, 26 Feb 2023 03:41:48 +0800 Subject: [PATCH 1/3] Implement parallel logical algorithms. --- src/common/algorithm.h | 60 ++++++++++++++++++++++++++---- tests/cpp/common/test_algorithm.cc | 49 ++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 937b6b63844e..3900628c344b 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -3,17 +3,19 @@ */ #ifndef XGBOOST_COMMON_ALGORITHM_H_ #define XGBOOST_COMMON_ALGORITHM_H_ -#include // upper_bound, stable_sort, sort, max -#include // size_t -#include // less -#include // iterator_traits, distance -#include // vector +#include // for upper_bound, stable_sort, sort, max, all_of, none_of, min +#include // for size_t +#include // for less +#include // for iterator_traits, distance +#include // for vector -#include "numeric.h" // Iota -#include "xgboost/context.h" // Context +#include "common.h" // for DivRoundUp +#include "numeric.h" // for Iota +#include "threading_utils.h" // for MemStackAllocator, DefaultMaxThreads, ParallelFor +#include "xgboost/context.h" // for Context // clang with libstdc++ works as well -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && !defined(__APPLE__) +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && !defined(__APPLE__) && defined(_OPENMP) #define GCC_HAS_PARALLEL 1 #endif // GLIC_VERSION @@ -71,6 +73,7 @@ void Sort(Context const *ctx, Iter begin, Iter end, Comp comp) { } } + template ::value_type, typename Comp = std::less> std::vector ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less{}) { @@ -82,6 +85,47 @@ std::vector ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = s StableSort(ctx, result.begin(), result.end(), op); return result; } + +namespace detail { +template +bool Logical(Context const *ctx, It first, It last, Op &&op) { + common::MemStackAllocator tloc{ + static_cast(ctx->Threads())}; + auto n = std::distance(first, last); + auto n_per_thread = common::DivRoundUp(n, ctx->Threads()); + common::ParallelFor(ctx->Threads(), ctx->Threads(), [&](auto t) { + auto begin = t * n_per_thread; + auto end = std::min(begin + n_per_thread, n); + + auto first_tloc = first + begin; + auto last_tloc = first + end; + + bool result = op(first_tloc, last_tloc); + tloc[t] = result; + }); + return std::all_of(tloc.cbegin(), tloc.cend(), [](auto v) { return v; }); +} +} // namespace detail + +/** + * \brief Parallel version of std::none_of + */ +template +bool NoneOf(Context const *ctx, It first, It last, Pred predicate) { + return detail::Logical(ctx, first, last, [&predicate](auto first, auto last) { + return std::none_of(first, last, predicate); + }); +} + +/** + * \brief Parallel version of std::all_of + */ +template +bool AllOf(Context const *ctx, It first, It last, Pred predicate) { + return detail::Logical(ctx, first, last, [&predicate](auto first, auto last) { + return std::all_of(first, last, predicate); + }); +} } // namespace common } // namespace xgboost diff --git a/tests/cpp/common/test_algorithm.cc b/tests/cpp/common/test_algorithm.cc index 630460714e37..eda9e38c57bd 100644 --- a/tests/cpp/common/test_algorithm.cc +++ b/tests/cpp/common/test_algorithm.cc @@ -2,10 +2,11 @@ * Copyright 2020-2023 by XGBoost Contributors */ #include -#include // Context -#include +#include // for Context -#include // is_sorted +#include // for is_sorted +#include // for int32_t +#include // for vector #include "../../../src/common/algorithm.h" @@ -31,5 +32,47 @@ TEST(Algorithm, Sort) { StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{}); ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend())); } + +TEST(Algorithm, AllOf) { + Context ctx; + auto is_zero = [](auto v) { return v == 0; }; + + for (std::size_t n : {3, 16}) { + std::vector data(n, 0); + for (std::int32_t n_threads : {1, 3, 7}) { + ctx.nthread = n_threads; + auto ret = AllOf(&ctx, data.cbegin(), data.cend(), is_zero); + ASSERT_TRUE(ret); + } + + data[n / 2] = 1; + for (std::int32_t n_threads : {1, 3, 7}) { + ctx.nthread = n_threads; + auto ret = AllOf(&ctx, data.cbegin(), data.cend(), is_zero); + ASSERT_FALSE(ret); + } + } +} + +TEST(Algorithm, NoneOf) { + Context ctx; + auto is_one = [](auto v) { return v == 1; }; + + for (std::size_t n : {3, 16}) { + std::vector data(n, 0); + for (std::int32_t n_threads : {1, 3, 7}) { + ctx.nthread = n_threads; + auto ret = NoneOf(&ctx, data.cbegin(), data.cend(), is_one); + ASSERT_TRUE(ret); + } + + data[n / 2] = 1; + for (std::int32_t n_threads : {1, 3, 7}) { + ctx.nthread = n_threads; + auto ret = NoneOf(&ctx, data.cbegin(), data.cend(), is_one); + ASSERT_FALSE(ret); + } + } +} } // namespace common } // namespace xgboost From 76fed13d9f76df3029ac18a1a4af0a73f7f216a1 Mon Sep 17 00:00:00 2001 From: jiamingy Date: Sun, 26 Feb 2023 03:43:43 +0800 Subject: [PATCH 2/3] lint. --- src/common/algorithm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 3900628c344b..fd347a4f3549 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -3,11 +3,11 @@ */ #ifndef XGBOOST_COMMON_ALGORITHM_H_ #define XGBOOST_COMMON_ALGORITHM_H_ -#include // for upper_bound, stable_sort, sort, max, all_of, none_of, min -#include // for size_t -#include // for less -#include // for iterator_traits, distance -#include // for vector +#include // for upper_bound, stable_sort, sort, max, all_of, none_of, min +#include // for size_t +#include // for less +#include // for iterator_traits, distance +#include // for vector #include "common.h" // for DivRoundUp #include "numeric.h" // for Iota @@ -15,7 +15,8 @@ #include "xgboost/context.h" // for Context // clang with libstdc++ works as well -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && !defined(__APPLE__) && defined(_OPENMP) +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \ + !defined(__APPLE__) && defined(_OPENMP) #define GCC_HAS_PARALLEL 1 #endif // GLIC_VERSION @@ -73,7 +74,6 @@ void Sort(Context const *ctx, Iter begin, Iter end, Comp comp) { } } - template ::value_type, typename Comp = std::less> std::vector ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less{}) { From 631405f45e255ea568a9aba388f9b248750d5aef Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Feb 2023 21:49:58 +0800 Subject: [PATCH 3/3] fixes. --- src/common/algorithm.h | 35 +++++++++++++++++++----------- tests/cpp/common/test_algorithm.cc | 22 ++++++++++++++----- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/common/algorithm.h b/src/common/algorithm.h index fd347a4f3549..fab92fd30e83 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -3,11 +3,12 @@ */ #ifndef XGBOOST_COMMON_ALGORITHM_H_ #define XGBOOST_COMMON_ALGORITHM_H_ -#include // for upper_bound, stable_sort, sort, max, all_of, none_of, min -#include // for size_t -#include // for less -#include // for iterator_traits, distance -#include // for vector +#include // for upper_bound, stable_sort, sort, max, all_of, none_of, min +#include // for size_t +#include // for less +#include // for iterator_traits, distance +#include // for is_same +#include // for vector #include "common.h" // for DivRoundUp #include "numeric.h" // for Iota @@ -88,18 +89,26 @@ std::vector ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = s namespace detail { template -bool Logical(Context const *ctx, It first, It last, Op &&op) { - common::MemStackAllocator tloc{ - static_cast(ctx->Threads())}; +bool Logical(Context const *ctx, It first, It last, Op op) { auto n = std::distance(first, last); - auto n_per_thread = common::DivRoundUp(n, ctx->Threads()); - common::ParallelFor(ctx->Threads(), ctx->Threads(), [&](auto t) { + auto n_threads = + std::max(std::min(n, static_cast(ctx->Threads())), static_cast(1)); + common::MemStackAllocator tloc{ + static_cast(n_threads), false}; + CHECK_GE(n, 0); + CHECK_GE(ctx->Threads(), 1); + static_assert(std::is_same::value, ""); + auto const n_per_thread = common::DivRoundUp(n, ctx->Threads()); + common::ParallelFor(static_cast(n_threads), n_threads, [&](auto t) { auto begin = t * n_per_thread; auto end = std::min(begin + n_per_thread, n); auto first_tloc = first + begin; auto last_tloc = first + end; - + if (first_tloc >= last_tloc) { + tloc[t] = true; + return; + } bool result = op(first_tloc, last_tloc); tloc[t] = result; }); @@ -112,7 +121,7 @@ bool Logical(Context const *ctx, It first, It last, Op &&op) { */ template bool NoneOf(Context const *ctx, It first, It last, Pred predicate) { - return detail::Logical(ctx, first, last, [&predicate](auto first, auto last) { + return detail::Logical(ctx, first, last, [&predicate](It first, It last) { return std::none_of(first, last, predicate); }); } @@ -122,7 +131,7 @@ bool NoneOf(Context const *ctx, It first, It last, Pred predicate) { */ template bool AllOf(Context const *ctx, It first, It last, Pred predicate) { - return detail::Logical(ctx, first, last, [&predicate](auto first, auto last) { + return detail::Logical(ctx, first, last, [&predicate](It first, It last) { return std::all_of(first, last, predicate); }); } diff --git a/tests/cpp/common/test_algorithm.cc b/tests/cpp/common/test_algorithm.cc index eda9e38c57bd..5b861bc75b97 100644 --- a/tests/cpp/common/test_algorithm.cc +++ b/tests/cpp/common/test_algorithm.cc @@ -37,16 +37,22 @@ TEST(Algorithm, AllOf) { Context ctx; auto is_zero = [](auto v) { return v == 0; }; - for (std::size_t n : {3, 16}) { + for (std::size_t n : {0, 3, 16, 128}) { std::vector data(n, 0); - for (std::int32_t n_threads : {1, 3, 7}) { + for (std::int32_t n_threads : {0, 1, 3, 7}) { ctx.nthread = n_threads; auto ret = AllOf(&ctx, data.cbegin(), data.cend(), is_zero); ASSERT_TRUE(ret); + // same result as std for empty case. + ASSERT_TRUE(std::all_of(data.cbegin(), data.cend(), is_zero)); + } + + if (n == 0) { + continue; } data[n / 2] = 1; - for (std::int32_t n_threads : {1, 3, 7}) { + for (std::int32_t n_threads : {0, 1, 3, 7}) { ctx.nthread = n_threads; auto ret = AllOf(&ctx, data.cbegin(), data.cend(), is_zero); ASSERT_FALSE(ret); @@ -58,12 +64,18 @@ TEST(Algorithm, NoneOf) { Context ctx; auto is_one = [](auto v) { return v == 1; }; - for (std::size_t n : {3, 16}) { + for (std::size_t n : {0, 3, 16, 128}) { std::vector data(n, 0); - for (std::int32_t n_threads : {1, 3, 7}) { + for (std::int32_t n_threads : {0, 1, 3, 7}) { ctx.nthread = n_threads; auto ret = NoneOf(&ctx, data.cbegin(), data.cend(), is_one); ASSERT_TRUE(ret); + // same result as std for empty case. + ASSERT_TRUE(std::none_of(data.cbegin(), data.cend(), is_one)); + } + + if (n == 0) { + continue; } data[n / 2] = 1;