-
Notifications
You must be signed in to change notification settings - Fork 13
/
hpc_transform_reduce.hpp
92 lines (76 loc) · 2.71 KB
/
hpc_transform_reduce.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#pragma once
#include <hpc_execution.hpp>
#ifdef HPC_CUDA
#include <thrust/execution_policy.h>
#include <thrust/transform_reduce.h>
#endif
namespace hpc {
template <class Range, class T, class BinaryOp, class UnaryOp>
HPC_ALWAYS_INLINE HPC_HOST_DEVICE T
transform_reduce(local_policy, Range const& range, T init, BinaryOp binary_op, UnaryOp unary_op) noexcept
{
auto first = range.begin();
auto const last = range.end();
for (; first != last; ++first) {
init = binary_op(std::move(init), unary_op(*first));
}
return init;
}
template <class Range, class T, class BinaryOp, class UnaryOp>
HPC_NOINLINE T
transform_reduce(serial_policy, Range const& range, T init, BinaryOp binary_op, UnaryOp unary_op)
{
auto first = range.begin();
auto const last = range.end();
for (; first != last; ++first) {
init = binary_op(std::move(init), unary_op(*first));
}
return init;
}
#ifdef HPC_CUDA
namespace impl {
template <class Iterator, class T, class BinaryOp, class UnaryOp>
T
transform_reduce(cuda_policy, Iterator first, Iterator last, T init, BinaryOp binary_op, UnaryOp unary_op)
{
return ::thrust::transform_reduce(::thrust::device, first, last, unary_op, init, binary_op);
}
template <class Index, class T, class BinaryOp, class UnaryOp>
T
transform_reduce(
cuda_policy,
::hpc::counting_iterator<Index> first,
::hpc::counting_iterator<Index> last,
T init,
BinaryOp binary_op,
UnaryOp unary_op)
{
int const n = int(last - first);
thrust::counting_iterator<int> new_first(0);
thrust::counting_iterator<int> new_last(n);
return ::thrust::transform_reduce(::thrust::device, new_first, new_last, unary_op, init, binary_op);
}
template <class TStored, class TResult, class Index, class BinaryOp, class UnaryOp>
TResult
transform_reduce(
cuda_policy,
::hpc::pointer_iterator<TStored, Index> first,
::hpc::pointer_iterator<TStored, Index> last,
TResult init,
BinaryOp binary_op,
UnaryOp unary_op)
{
auto const size = std::size_t(last - first);
TStored* const new_first = &(*first);
TStored* const new_last = new_first + size;
return ::thrust::transform_reduce(::thrust::device, new_first, new_last, unary_op, init, binary_op);
}
} // namespace impl
template <class Range, class T, class BinaryOp, class UnaryOp>
HPC_NOINLINE T
transform_reduce(cuda_policy policy, Range const& range, T init, BinaryOp binary_op, UnaryOp unary_op)
{
return ::hpc::impl::transform_reduce(policy, range.begin(), range.end(), init, binary_op, unary_op);
}
#endif
} // namespace hpc