Skip to content

Commit

Permalink
[NFC][SYCL][Reduction] Use different workaround than #7484 (#7500)
Browse files Browse the repository at this point in the history
This reverts #7484 and uses a less intrusive workaround for the gcc bug.
  • Loading branch information
aelovikov-intel authored Nov 23, 2022
1 parent ccd1639 commit 3667cf3
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions sycl/include/sycl/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2314,15 +2314,17 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,

// Before running the kernels, check that device has enough local memory
// to hold local arrays required for the tree-reduction algorithm.
size_t OneElemSize;
if constexpr (NumArgs == 2) {
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
constexpr bool IsTreeReduction =
!Reduction::has_fast_reduce && !Reduction::has_fast_atomics;
OneElemSize = IsTreeReduction ? sizeof(typename Reduction::result_type) : 0;
} else {
OneElemSize = reduGetMemPerWorkItem(ReduTuple, ReduIndices);
}
size_t OneElemSize = [&]() {
// Can't use outlined NumArgs due to a bug in gcc 8.4.
if constexpr (sizeof...(RestT) == 2) {
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
constexpr bool IsTreeReduction =
!Reduction::has_fast_reduce && !Reduction::has_fast_atomics;
return IsTreeReduction ? sizeof(typename Reduction::result_type) : 0;
} else {
return reduGetMemPerWorkItem(ReduTuple, ReduIndices);
}
}();

uint32_t NumConcurrentWorkGroups =
#ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS
Expand Down

0 comments on commit 3667cf3

Please sign in to comment.