Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into add-map-retrieve
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Dec 3, 2024
2 parents 6b5f7fb + 759b710 commit 1bad268
Show file tree
Hide file tree
Showing 14 changed files with 178 additions and 44 deletions.
8 changes: 6 additions & 2 deletions include/cuco/bloom_filter_policies.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <cuco/detail/bloom_filter/arrow_filter_policy.cuh>
#include <cuco/detail/bloom_filter/default_filter_policy_impl.cuh>
#include <cuco/hash_functions.cuh>

#include <cstdint>

Expand All @@ -28,9 +29,12 @@ namespace cuco {
* fingerprint.
*
* @tparam Key The type of the values to generate a fingerprint for.
* @tparam XXHash64 Custom (64 bit) XXHash hasher to generate a key's fingerprint.
* By default, cuco::xxhash_64 hasher will be used.
*
*/
template <class Key>
using arrow_filter_policy = detail::arrow_filter_policy<Key>;
template <class Key, class XXHash64 = cuco::xxhash_64<Key>>
using arrow_filter_policy = detail::arrow_filter_policy<Key, XXHash64>;

/**
* @brief The default policy that defines how a Blocked Bloom Filter generates and stores a key's
Expand Down
12 changes: 7 additions & 5 deletions include/cuco/detail/bloom_filter/arrow_filter_policy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ namespace cuco::detail {
* void bulk_insert_and_eval_arrow_policy_bloom_filter(device_vector<KeyType> const& positive_keys,
* device_vector<KeyType> const& negative_keys)
* {
* using policy_type = cuco::arrow_filter_policy<key_type>;
* using xxhash_64 = cuco::xxhash_64<KeyType>;
* using policy_type = cuco::arrow_filter_policy<KeyType, xxhash_64>;
*
* // Warn or throw if the number of filter blocks is greater than maximum used by Arrow policy.
* static_assert(NUM_FILTER_BLOCKS <= policy_type::max_filter_blocks, "NUM_FILTER_BLOCKS must be
* in range: [1, 4194304]");
*
* // Create a bloom filter with Arrow policy
* cuco::bloom_filter<key_type, cuco::extent<size_t>,
* cuco::bloom_filter<KeyType, cuco::extent<size_t>,
* cuda::thread_scope_device, policy_type> filter{NUM_FILTER_BLOCKS};
*
* // Add positive keys to the bloom filter
Expand Down Expand Up @@ -78,12 +79,13 @@ namespace cuco::detail {
* @endcode
*
* @tparam Key The type of the values to generate a fingerprint for.
* @tparam XXHash64 64-bit XXHash hasher implementation for fingerprint generation.
*/
template <class Key>
template <class Key, class XXHash64>
class arrow_filter_policy {
public:
using hasher = cuco::xxhash_64<Key>; ///< xxhash_64 hasher for Arrow bloom filter policy
using word_type = std::uint32_t; ///< uint32_t for Arrow bloom filter policy
using hasher = XXHash64; ///< 64-bit XXHash hasher for Arrow bloom filter policy
using word_type = std::uint32_t; ///< uint32_t for Arrow bloom filter policy
using hash_argument_type = typename hasher::argument_type; ///< Hash function input type
using hash_result_type = decltype(std::declval<hasher>()(
std::declval<hash_argument_type>())); ///< hash function output type
Expand Down
9 changes: 5 additions & 4 deletions include/cuco/detail/bloom_filter/bloom_filter_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class bloom_filter_impl {
auto const grid_size =
cuco::detail::grid_size(num_keys, cg_size, cuco::detail::default_stride(), block_size);

detail::add_if_n<cg_size, block_size>
detail::bloom_filter_ns::add_if_n<cg_size, block_size>
<<<grid_size, block_size, 0, stream.get()>>>(first, num_keys, stencil, pred, *this);
}

Expand Down Expand Up @@ -303,8 +303,9 @@ class bloom_filter_impl {
auto const grid_size =
cuco::detail::grid_size(num_keys, cg_size, cuco::detail::default_stride(), block_size);

detail::contains_if_n<cg_size, block_size><<<grid_size, block_size, 0, stream.get()>>>(
first, num_keys, stencil, pred, output_begin, *this);
detail::bloom_filter_ns::contains_if_n<cg_size, block_size>
<<<grid_size, block_size, 0, stream.get()>>>(
first, num_keys, stencil, pred, output_begin, *this);
}

[[nodiscard]] __host__ __device__ constexpr word_type* data() noexcept { return words_; }
Expand Down Expand Up @@ -365,4 +366,4 @@ class bloom_filter_impl {
policy_type policy_;
};

} // namespace cuco::detail
} // namespace cuco::detail
4 changes: 2 additions & 2 deletions include/cuco/detail/bloom_filter/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cstdint>
#include <iterator>

namespace cuco::detail {
namespace cuco::detail::bloom_filter_ns {

CUCO_SUPPRESS_KERNEL_WARNINGS

Expand Down Expand Up @@ -89,4 +89,4 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void contains_if_n(InputIt first,
}
}

} // namespace cuco::detail
} // namespace cuco::detail::bloom_filter_ns
4 changes: 2 additions & 2 deletions include/cuco/detail/open_addressing/functors.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <cuco/detail/bitwise_compare.cuh>
#include <cuco/detail/pair/traits.hpp>

namespace cuco::open_addressing_ns::detail {
namespace cuco::detail::open_addressing_ns {

/**
* @brief Device functor returning the content of the slot indexed by `idx`
Expand Down Expand Up @@ -107,4 +107,4 @@ struct slot_is_filled {
}
};

} // namespace cuco::open_addressing_ns::detail
} // namespace cuco::detail::open_addressing_ns
4 changes: 2 additions & 2 deletions include/cuco/detail/open_addressing/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#include <iterator>

namespace cuco::detail {
namespace cuco::detail::open_addressing_ns {
CUCO_SUPPRESS_KERNEL_WARNINGS

/**
Expand Down Expand Up @@ -729,4 +729,4 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void rehash(
}
}

} // namespace cuco::detail
} // namespace cuco::detail::open_addressing_ns
35 changes: 18 additions & 17 deletions include/cuco/detail/open_addressing/open_addressing_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::insert_if_n<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::insert_if_n<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, stencil, pred, counter.data(), container_ref);

Expand Down Expand Up @@ -384,7 +384,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::insert_if_n<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::insert_if_n<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, stencil, pred, container_ref);
}
Expand Down Expand Up @@ -426,7 +426,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::insert_and_find<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::insert_and_find<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, found_begin, inserted_begin, container_ref);
}
Expand Down Expand Up @@ -466,7 +466,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::erase<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::erase<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, container_ref);
}
Expand Down Expand Up @@ -540,7 +540,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::contains_if_n<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::contains_if_n<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, stencil, pred, output_begin, container_ref);
}
Expand Down Expand Up @@ -615,7 +615,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::find_if_n<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::find_if_n<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, stencil, pred, output_begin, container_ref);
}
Expand Down Expand Up @@ -789,8 +789,8 @@ class open_addressing_impl {
std::min(static_cast<cuco::detail::index_type>(this->capacity()) - offset, stride);
auto const begin = thrust::make_transform_iterator(
thrust::counting_iterator{static_cast<size_type>(offset)},
open_addressing_ns::detail::get_slot<has_payload, storage_ref_type>(this->storage_ref()));
auto const is_filled = open_addressing_ns::detail::slot_is_filled<has_payload, key_type>{
detail::open_addressing_ns::get_slot<has_payload, storage_ref_type>(this->storage_ref()));
auto const is_filled = detail::open_addressing_ns::slot_is_filled<has_payload, key_type>{
this->empty_key_sentinel(), this->erased_key_sentinel()};

std::size_t temp_storage_bytes = 0;
Expand Down Expand Up @@ -844,7 +844,7 @@ class open_addressing_impl {
template <typename CallbackOp>
void for_each_async(CallbackOp&& callback_op, cuda::stream_ref stream) const
{
auto const is_filled = open_addressing_ns::detail::slot_is_filled<has_payload, key_type>{
auto const is_filled = detail::open_addressing_ns::slot_is_filled<has_payload, key_type>{
this->empty_key_sentinel(), this->erased_key_sentinel()};

auto storage_ref = this->storage_ref();
Expand Down Expand Up @@ -886,7 +886,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::for_each_n<cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::for_each_n<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, std::forward<CallbackOp>(callback_op), container_ref);
}
Expand All @@ -907,12 +907,12 @@ class open_addressing_impl {
counter.reset(stream);

auto const grid_size = cuco::detail::grid_size(storage_.num_buckets());
auto const is_filled = open_addressing_ns::detail::slot_is_filled<has_payload, key_type>{
auto const is_filled = detail::open_addressing_ns::slot_is_filled<has_payload, key_type>{
this->empty_key_sentinel(), this->erased_key_sentinel()};

// TODO: custom kernel to be replaced by cub::DeviceReduce::Sum when cub version is bumped to
// v2.1.0
detail::size<cuco::detail::default_block_size()>
detail::open_addressing_ns::size<cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
storage_.ref(), is_filled, counter.data());

Expand Down Expand Up @@ -1014,10 +1014,10 @@ class open_addressing_impl {
auto constexpr block_size = cuco::detail::default_block_size();
auto constexpr stride = cuco::detail::default_stride();
auto const grid_size = cuco::detail::grid_size(num_buckets, 1, stride, block_size);
auto const is_filled = open_addressing_ns::detail::slot_is_filled<has_payload, key_type>{
auto const is_filled = detail::open_addressing_ns::slot_is_filled<has_payload, key_type>{
this->empty_key_sentinel(), this->erased_key_sentinel()};

detail::rehash<block_size><<<grid_size, block_size, 0, stream.get()>>>(
detail::open_addressing_ns::rehash<block_size><<<grid_size, block_size, 0, stream.get()>>>(
old_storage.ref(), container.ref(op::insert), is_filled);
}

Expand Down Expand Up @@ -1120,7 +1120,7 @@ class open_addressing_impl {

auto const grid_size = cuco::detail::grid_size(num_keys, cg_size);

detail::count<IsOuter, cg_size, cuco::detail::default_block_size()>
detail::open_addressing_ns::count<IsOuter, cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num_keys, counter.data(), container_ref);

Expand Down Expand Up @@ -1180,8 +1180,9 @@ class open_addressing_impl {
auto constexpr grid_stride = 1;
auto const grid_size = cuco::detail::grid_size(n, cg_size, grid_stride, block_size);

detail::retrieve<IsOuter, block_size><<<grid_size, block_size, 0, stream.get()>>>(
first, n, output_probe, output_match, counter.data(), container_ref);
detail::open_addressing_ns::retrieve<IsOuter, block_size>
<<<grid_size, block_size, 0, stream.get()>>>(
first, n, output_probe, output_match, counter.data(), container_ref);

auto const num_retrieved = counter.load_to_host(stream.get());

Expand Down
4 changes: 2 additions & 2 deletions include/cuco/detail/static_map/helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <cuco/detail/static_map/kernels.cuh>
#include <cuco/detail/utility/cuda.cuh>

namespace cuco::static_map_ns::detail {
namespace cuco::detail::static_map_ns {

/**
* @brief Dispatches to shared memory map kernel if `num_elements_per_thread > 2`, else
Expand Down Expand Up @@ -112,4 +112,4 @@ void dispatch_insert_or_apply(
first, num, init, op, ref);
}
}
} // namespace cuco::static_map_ns::detail
} // namespace cuco::detail::static_map_ns
4 changes: 2 additions & 2 deletions include/cuco/detail/static_map/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

#include <iterator>

namespace cuco::static_map_ns::detail {
namespace cuco::detail::static_map_ns {
CUCO_SUPPRESS_KERNEL_WARNINGS

// TODO user insert_or_assign internally
Expand Down Expand Up @@ -262,4 +262,4 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem(
}
}
}
} // namespace cuco::static_map_ns::detail
} // namespace cuco::detail::static_map_ns
6 changes: 3 additions & 3 deletions include/cuco/detail/static_map/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora

auto const grid_size = cuco::detail::grid_size(num, cg_size);

static_map_ns::detail::insert_or_assign<cg_size, cuco::detail::default_block_size()>
detail::static_map_ns::insert_or_assign<cg_size, cuco::detail::default_block_size()>
<<<grid_size, cuco::detail::default_block_size(), 0, stream.get()>>>(
first, num, ref(op::insert_or_assign));
}
Expand Down Expand Up @@ -335,7 +335,7 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
{
auto constexpr has_init = false;
auto const init = this->empty_value_sentinel(); // use empty_sentinel as unused init value
static_map_ns::detail::dispatch_insert_or_apply<has_init, cg_size, Allocator>(
detail::static_map_ns::dispatch_insert_or_apply<has_init, cg_size, Allocator>(
first, last, init, op, ref(op::insert_or_apply), stream);
}

Expand All @@ -353,7 +353,7 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
InputIt first, InputIt last, Init init, Op op, cuda::stream_ref stream) noexcept
{
auto constexpr has_init = true;
static_map_ns::detail::dispatch_insert_or_apply<has_init, cg_size, Allocator>(
detail::static_map_ns::dispatch_insert_or_apply<has_init, cg_size, Allocator>(
first, last, init, op, ref(op::insert_or_apply), stream);
}

Expand Down
55 changes: 55 additions & 0 deletions include/cuco/detail/static_set/static_set.inl
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,29 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>
impl_->find_async(first, last, output_begin, ref(op::find), stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt, typename ProbeEqual, typename ProbeHash, typename OutputIt>
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::find_async(
InputIt first,
InputIt last,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputIt output_begin,
cuda::stream_ref stream) const
{
impl_->find_async(first,
last,
output_begin,
ref(op::find).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash),
stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
Expand Down Expand Up @@ -376,6 +399,38 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>
impl_->find_if_async(first, last, stencil, pred, output_begin, ref(op::find), stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt,
typename StencilIt,
typename Predicate,
typename ProbeEqual,
typename ProbeHash,
typename OutputIt>
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::find_if_async(
InputIt first,
InputIt last,
StencilIt stencil,
Predicate pred,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputIt output_begin,
cuda::stream_ref stream) const
{
impl_->find_if_async(first,
last,
stencil,
pred,
output_begin,
ref(op::find).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash),
stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
Expand Down
Loading

0 comments on commit 1bad268

Please sign in to comment.