From 9b8ad2e70bc68ace228d572d47a74681c07fe4ae Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 16 Nov 2023 14:25:34 -0800 Subject: [PATCH] Fix a bug: sync before return --- .../detail/open_addressing/open_addressing_ref_impl.cuh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh b/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh index 1c75e0c2c..26e5a055a 100644 --- a/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh +++ b/include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh @@ -466,8 +466,9 @@ class open_addressing_ref_impl { #if __CUDA_ARCH__ < 700 // Spinning to ensure that the write to the value part took place requires // independent thread scheduling introduced with the Volta architecture. - static_assert(cuco::detail::is_packable(), - "insert_and_find is not supported for unpackable data on pre-Volta GPUs."); + static_assert( + cuco::detail::is_packable(), + "insert_and_find is not supported for pair types larger than 8 bytes on pre-Volta GPUs."); #endif auto const key = this->extract_key(value); @@ -510,6 +511,7 @@ class open_addressing_ref_impl { this->wait_for_payload(slot_ptr->second, this->empty_slot_sentinel_.second); } } + group.sync(); return {iterator{reinterpret_cast(res)}, false}; } @@ -535,6 +537,7 @@ class open_addressing_ref_impl { this->wait_for_payload(slot_ptr->second, this->empty_slot_sentinel_.second); } } + group.sync(); return {iterator{reinterpret_cast(res)}, true}; } case insert_result::DUPLICATE: { @@ -544,6 +547,7 @@ class open_addressing_ref_impl { this->wait_for_payload(slot_ptr->second, this->empty_slot_sentinel_.second); } } + group.sync(); return {iterator{reinterpret_cast(res)}, false}; } default: continue;