From d2f04ff2e95bf04c4f0db2c3f6f3a97c744e5c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20J=C3=BCnger?= Date: Wed, 18 Sep 2024 17:40:17 +0000 Subject: [PATCH] Set maximum required alignment to 16 bytes --- include/cuco/detail/bloom_filter/bloom_filter_impl.cuh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh index 351ef404c..28a7bcbfb 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh +++ b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh @@ -22,8 +22,10 @@ #include #include +// TODO #include once available #include #include +#include #include #include #include @@ -277,9 +279,14 @@ class bloom_filter_impl { words_ + index, min(sizeof(word_type) * NumWords, required_alignment()))); } + __host__ __device__ static constexpr size_t max_vec_bytes() noexcept + { + return 16; // LDG128 is the widest load we can perform + } + __host__ __device__ static constexpr size_t required_alignment() noexcept { - return sizeof(word_type) * words_per_block; // TODO check if a maximum of 16byte is sufficient + return cuda::std::max(sizeof(word_type) * words_per_block, max_vec_bytes()); } word_type* words_;