Skip to content

Commit

Permalink
Minor
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 committed Oct 26, 2024
1 parent e389a7b commit 8c536bd
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 21 deletions.
2 changes: 1 addition & 1 deletion examples/bloom_filter/host_bulk_example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ int main(void)
bloom_filter<key_type, cuco::extent<size_t>, cuda::thread_scope_device, arrow_policy_type>;

// Spawn a bloom filter with arrow policy and 200 sub-filters.
arrow_policy_filter_type filter_arrow_policy{sub_filters, {}, arrow_policy_type{sub_filters}};
arrow_policy_filter_type filter_arrow_policy{sub_filters};

// bulk insert to the bloom filter and evaluate
std::cout << "Bulk insert and evaluate bloom filter with arrow policy: " << std::endl;
Expand Down
34 changes: 16 additions & 18 deletions include/cuco/detail/bloom_filter/arrow_filter_policy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cuco/detail/error.hpp>
#include <cuco/hash_functions.cuh>

#include <cuda/std/__algorithm_>
#include <cuda/std/bit>
#include <cuda/std/limits>
#include <cuda/std/tuple>
Expand Down Expand Up @@ -50,30 +51,20 @@ class arrow_filter_policy {
32; ///< Number of bytes in one Arrow filter block
static constexpr std::uint32_t max_arrow_filter_bytes =
128 * 1024 * 1024; ///< Max bytes in Arrow bloom filter

static constexpr std::uint32_t max_filter_blocks =
(max_arrow_filter_bytes /
bytes_per_filter_block); ///< Max sub-filter blocks allowed in Arrow bloom filter
/**
* @brief Constructs the `arrow_filter_policy` object.
*
* @throws If number of filter blocks (`num_blocks`) is smaller than 1
* or larger than 4194304. If called from host: throws exception;
* If called from device: Traps the kernel.
* @note The number of filter blocks with Arrow policy must be in the
* range of [1, 4194304]. If the bloom filter is constructed with a larger
* number of blocks, only the first 4194304 (128MB) blocks will be used.
*
* @param num_blocks Number of bloom filter blocks
* @param hash Hash function used to generate a key's fingerprint
*/
__host__ __device__ constexpr arrow_filter_policy(std::uint32_t num_blocks, hasher hash = {})
: hash_{hash}
{
NV_DISPATCH_TARGET(
NV_IS_HOST,
(CUCO_EXPECTS(
num_blocks >= 1 and num_blocks <= (max_arrow_filter_bytes / bytes_per_filter_block),
"The `num_blocks` in Arrow filter policy must be in the range of [1, 4194304]");),
NV_IS_DEVICE,
(if (num_blocks < 1 or num_blocks > (max_arrow_filter_bytes / bytes_per_filter_block)) {
__trap(); // TODO this kills the kernel and corrupts the CUDA context. Not ideal.
}));
}
__host__ __device__ constexpr arrow_filter_policy(hasher hash = {}) : hash_{hash} {}

/**
* @brief Generates the hash value for a given key.
Expand All @@ -90,6 +81,10 @@ class arrow_filter_policy {
/**
* @brief Determines the filter block a key is added into.
*
* @note The number of filter blocks with Arrow policy must be in the
* range of [1, 4194304]. Passing a larger `num_blocks` will still
* upperbound the number of blocks used to the mentioned range.
*
* @tparam Extent Size type that is used to determine the number of blocks in the filter
*
* @param hash Hash value of the key
Expand All @@ -101,7 +96,10 @@ class arrow_filter_policy {
__device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const
{
constexpr auto hash_bits = cuda::std::numeric_limits<word_type>::digits;
return static_cast<word_type>(((hash >> hash_bits) * num_blocks) >> hash_bits);
// TODO: assert if num_blocks > max_filter_blocks
auto const max_blocks = cuda::std::min<Extent>(num_blocks, max_filter_blocks);
// Make sure we are only contained withing the `max_filter_blocks` blocks
return static_cast<word_type>(((hash >> hash_bits) * max_blocks) >> hash_bits) % max_blocks;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions tests/bloom_filter/unique_sequence_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void test_unique_sequence(Filter& filter, size_type num_keys)
REQUIRE(cuco::test::all_of(contained.begin(), contained.end(), thrust::identity{}));
}

SECTION("After clearing the flter no keys should be contained.")
SECTION("After clearing the filter no keys should be contained.")
{
filter.clear();
filter.contains(keys.begin(), keys.end(), contained.begin());
Expand Down Expand Up @@ -116,7 +116,7 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence with arrow policy",
cuco::bloom_filter<Key, cuco::extent<size_t>, cuda::thread_scope_device, Policy>;
constexpr size_type num_keys{400};

auto filter = filter_type{1000, {}, {1000}};
auto filter = filter_type{1000};

test_unique_sequence(filter, num_keys);
}

0 comments on commit 8c536bd

Please sign in to comment.