From 21fa5b056532664699fe303bb4a9883a0d48b119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20J=C3=BCnger?= Date: Sat, 10 Aug 2024 01:46:36 +0000 Subject: [PATCH] Use scalar add(...) for window_size=1 --- include/cuco/detail/bloom_filter/kernels.cuh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/cuco/detail/bloom_filter/kernels.cuh b/include/cuco/detail/bloom_filter/kernels.cuh index b64d97053..875600ccb 100644 --- a/include/cuco/detail/bloom_filter/kernels.cuh +++ b/include/cuco/detail/bloom_filter/kernels.cuh @@ -35,13 +35,17 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void add_if_n( auto const loop_stride = cuco::detail::grid_stride() / window_size; auto idx = cuco::detail::global_thread_id() / window_size; - auto const tile = + [[maybe_unused]] auto const tile = cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); while (idx < n) { if (pred(*(stencil + idx))) { typename std::iterator_traits::value_type const& insert_element{*(first + idx)}; - ref.add(tile, insert_element); + if constexpr (window_size == 1) { + ref.add(insert_element); + } else { + ref.add(tile, insert_element); + } } idx += loop_stride; }