diff --git a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh index 351ef404c..28a7bcbfb 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh +++ b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh @@ -22,8 +22,10 @@ #include #include +// TODO #include once available #include #include +#include #include #include #include @@ -277,9 +279,14 @@ class bloom_filter_impl { words_ + index, min(sizeof(word_type) * NumWords, required_alignment()))); } + __host__ __device__ static constexpr size_t max_vec_bytes() noexcept + { + return 16; // LDG128 is the widest load we can perform + } + __host__ __device__ static constexpr size_t required_alignment() noexcept { - return sizeof(word_type) * words_per_block; // TODO check if a maximum of 16byte is sufficient + return cuda::std::max(sizeof(word_type) * words_per_block, max_vec_bytes()); } word_type* words_;