Skip to content

Commit

Permalink
Migrate static map examples (#407)
Browse files Browse the repository at this point in the history
This PR migrates all single map examples to use the new experimental
map. It also fixes a bug where bitwise compare should be used to compare
sentinel values.
  • Loading branch information
PointKernel authored Dec 19, 2023
1 parent ffd3b99 commit d12da95
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 90 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
`cuco::static_map` is a fixed-size hash table using open addressing with linear probing. See the Doxygen documentation in `static_map.cuh` for more detailed information.

#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/T49P85Mnd))
- [Device-view APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_view_example.cu) (see [live example in godbolt](https://godbolt.org/z/dh8bMn3G1))
- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/7djKevK6e))
- [Key histogram](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/count_by_key_example.cu) (see [live example in godbolt](https://godbolt.org/z/vecGeYM48))
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/7jK9od6bx))
- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/W338MePdW))
- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/YYb1WE9od))
- [Key histogram](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/count_by_key_example.cu) (see [live example in godbolt](https://godbolt.org/z/6rz7MYoMe))

### `static_multimap`

Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ConfigureExample(STATIC_SET_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/stati
ConfigureExample(STATIC_SET_DEVICE_REF_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_set/device_ref_example.cu")
ConfigureExample(STATIC_SET_DEVICE_SUBSETS_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_set/device_subsets_example.cu")
ConfigureExample(STATIC_MAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/host_bulk_example.cu")
ConfigureExample(STATIC_MAP_DEVICE_SIDE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/device_view_example.cu")
ConfigureExample(STATIC_MAP_DEVICE_SIDE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/device_ref_example.cu")
ConfigureExample(STATIC_MAP_CUSTOM_TYPE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/custom_type_example.cu")
ConfigureExample(STATIC_MAP_COUNT_BY_KEY_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/count_by_key_example.cu")
ConfigureExample(STATIC_MULTIMAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_multimap/host_bulk_example.cu")
46 changes: 25 additions & 21 deletions examples/static_map/count_by_key_example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* the context of a count-by-key operation, i.e. for a histogram over keys.
*
* Individual operations like a single insert or find can be performed in device code via the
* static_map "device_view" types.
* "static_map_ref" types.
*
* @note This example is for demonstration purposes only. It is not intended to show the most
* performant way to do the example algorithm.
Expand All @@ -47,17 +47,17 @@
* @brief Inserts keys and counts how often they occur in the input sequence.
*
* @tparam BlockSize CUDA block size
* @tparam Map Type of the map returned from static_map::get_device_mutable_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
* @tparam UniqueIter Output iterator whose value_type is convertible to uint64_t
*
* @param[in] map_view View of the map into which inserts will be performed
* @param[in] map_ref Reference of the map into which inserts will be performed
* @param[in] key_begin The beginning of the range of keys to insert
* @param[in] num_keys The total number of keys and values
* @param[out] num_unique_keys The total number of distinct keys inserted
*/
template <int64_t BlockSize, typename Map, typename KeyIter, typename UniqueIter>
__global__ void count_by_key(Map map_view,
__global__ void count_by_key(Map map_ref,
KeyIter keys,
uint64_t num_keys,
UniqueIter num_unique_keys)
Expand All @@ -71,13 +71,14 @@ __global__ void count_by_key(Map map_view,
uint64_t thread_unique_keys = 0;
while (idx < num_keys) {
// insert key into the map with a count of 1
auto [slot, is_new_key] = map_view.insert_and_find({keys[idx], 1});
auto [slot, is_new_key] = map_ref.insert_and_find(cuco::pair{keys[idx], 1});
if (is_new_key) {
// first occurrence of the key
thread_unique_keys++;
} else {
// key is already in the map -> increment count
slot->second.fetch_add(1, cuda::memory_order_relaxed);
auto ref = cuda::atomic_ref<uint32_t, cuda::thread_scope_device>{slot->second};
ref.fetch_add(1, cuda::memory_order_relaxed);
}
idx += loop_stride;
}
Expand All @@ -101,7 +102,7 @@ int main(void)
// Empty slots are represented by reserved "sentinel" values. These values should be selected such
// that they never occur in your input data.
Key constexpr empty_key_sentinel = static_cast<Key>(-1);
Count constexpr empty_value_sentinel = static_cast<Key>(-1);
Count constexpr empty_value_sentinel = static_cast<Count>(-1);

// Number of keys to be inserted
auto constexpr num_keys = 50'000;
Expand All @@ -125,34 +126,37 @@ int main(void)
// Compute capacity based on a 50% load factor
auto constexpr load_factor = 0.5;

// If the number of unique keys is known in advance, we can use it to calculate the map capacity
std::size_t const capacity = std::ceil((num_keys / key_duplicates) / load_factor);
// If we can't give an estimated upper bound on the number of unique keys
// we conservatively assume each key in the input is distinct
// std::size_t const capacity = std::ceil(num_keys / load_factor);
// If the number of elements is known in advance, we can use it to calculate the map capacity
std::size_t const num_elements = num_keys / key_duplicates;

// Constructs a map with "capacity" slots.
cuco::static_map<Key, Count> map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
// Constructs a map with number of elements and desired load factor.
auto map = cuco::experimental::static_map{
num_elements,
load_factor,
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
thrust::equal_to<Key>{},
cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{}};

// Get a non-owning, mutable view of the map that allows inserts to pass by value into the kernel
auto device_insert_view = map.get_device_mutable_view();
// Get a non-owning, mutable reference of the map that allows `insert_and_find` operation to pass
// by value into the kernel
auto map_ref = map.ref(cuco::experimental::op::insert_and_find);

auto constexpr block_size = 256;
auto const grid_size = (num_keys + block_size - 1) / block_size;
count_by_key<block_size><<<grid_size, block_size>>>(
device_insert_view, insert_keys.begin(), num_keys, num_unique_keys.data());
count_by_key<block_size>
<<<grid_size, block_size>>>(map_ref, insert_keys.begin(), num_keys, num_unique_keys.data());

// Retrieve contents of all the non-empty slots in the map
thrust::device_vector<Key> result_keys(num_unique_keys[0]);
thrust::device_vector<Count> result_counts(num_unique_keys[0]);
map.retrieve_all(result_keys.begin(), result_counts.begin());

// Check if the number of result keys is correct
auto num_keys_check = num_unique_keys[0] == (num_keys / key_duplicates);
auto const num_keys_check = num_unique_keys[0] == (num_keys / key_duplicates);

// Iterate over all result counts and verify that they are correct
auto counts_check = thrust::all_of(
auto const counts_check = thrust::all_of(
result_counts.begin(), result_counts.end(), [] __host__ __device__(Count const count) {
return count == key_duplicates;
});
Expand Down
53 changes: 17 additions & 36 deletions examples/static_map/custom_type_example.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,32 +25,13 @@
#include <cuda/functional>

// User-defined key type
#if !defined(CUCO_HAS_INDEPENDENT_THREADS)
struct custom_key_type {
int32_t a;
int32_t b;

__host__ __device__ custom_key_type() {}
__host__ __device__ custom_key_type(int32_t x) : a{x}, b{x} {}
};
#else
// Key type larger than 8B only supported for sm_70 and up
struct custom_key_type {
int32_t a;
int32_t b;
int32_t c;

__host__ __device__ custom_key_type() {}
__host__ __device__ custom_key_type(int32_t x) : a{x}, b{x}, c{x} {}

// Device equality operator is mandatory due to libcudacxx bug:
// https://github.com/NVIDIA/libcudacxx/issues/223
__device__ bool operator==(custom_key_type const& other) const
{
return a == other.a and b == other.b and c == other.c;
}
};
#endif

// User-defined value type
struct custom_value_type {
Expand All @@ -63,17 +44,12 @@ struct custom_value_type {

// User-defined device hash callable
struct custom_hash {
template <typename key_type>
__device__ uint32_t operator()(key_type k)
{
return k.a;
};
__device__ uint32_t operator()(custom_key_type const& k) const noexcept { return k.a; };
};

// User-defined device key equal callable
struct custom_key_equals {
template <typename key_type>
__device__ bool operator()(key_type const& lhs, key_type const& rhs)
struct custom_key_equal {
__device__ bool operator()(custom_key_type const& lhs, custom_key_type const& rhs) const noexcept
{
return lhs.a == rhs.a;
}
Expand All @@ -91,15 +67,20 @@ int main(void)
auto pairs_begin = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0),
cuda::proclaim_return_type<cuco::pair<custom_key_type, custom_value_type>>(
[] __device__(auto i) { return cuco::make_pair(custom_key_type{i}, custom_value_type{i}); }));
[] __device__(auto i) {
return cuco::pair{custom_key_type{i}, custom_value_type{i}};
}));

// Construct a map with 100,000 slots using the given empty key/value sentinels. Note the
// capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%.
cuco::static_map<custom_key_type, custom_value_type> map{
100'000, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
auto map = cuco::experimental::static_map{cuco::experimental::extent<std::size_t, 100'000>{},
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
custom_key_equal{},
cuco::experimental::linear_probing<1, custom_hash>{}};

// Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable
map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{});
map.insert(pairs_begin, pairs_begin + num_pairs);

// Reproduce inserted keys
auto insert_keys =
Expand All @@ -111,14 +92,14 @@ int main(void)

// Determine if all the inserted keys can be found by using the same hasher and equality
// function as `insert`. If a key `insert_keys[i]` doesn't exist, `contained[i] == false`.
map.contains(
insert_keys, insert_keys + num_pairs, contained.begin(), custom_hash{}, custom_key_equals{});
map.contains(insert_keys, insert_keys + num_pairs, contained.begin());
// This will fail due to inconsistent hash and key equal.
// map.contains(insert_keys, insert_keys + num_pairs, contained.begin());

// All inserted keys are contained
assert(
thrust::all_of(contained.begin(), contained.end(), [] __device__(auto const& b) { return b; }));
auto const all_contained =
thrust::all_of(contained.begin(), contained.end(), [] __device__(auto const& b) { return b; });
if (all_contained) { std::cout << "Success! Found all values.\n"; }

return 0;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,12 +29,11 @@
#include <limits>

/**
* @file device_view_example.cu
* @file device_ref_example.cu
* @brief Demonstrates usage of the device side APIs for individual operations like insert/find.
*
* Individual operations like a single insert or find can be performed in device code via the
* static_map "device_view" types. Note that concurrent insert and find are not supported, and
* therefore there are separate view types for insert and find to help prevent undefined behavior.
* "static_map_ref" types.
*
* @note This example is for demonstration purposes only. It is not intended to show the most
* performant way to do the example algorithm.
Expand All @@ -44,12 +43,12 @@
/**
* @brief Inserts keys that pass the specified predicated into the map.
*
* @tparam Map Type of the map returned from static_map::get_device_mutable_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
* @tparam ValueIter Input iterator whose value_type is convertible to Map::mapped_type
* @tparam Predicate Unary predicate
*
* @param[in] map_view View of the map into which inserts will be performed
* @param[in] map_ref Reference of the map into which inserts will be performed
* @param[in] key_begin The beginning of the range of keys to insert
* @param[in] value_begin The beginning of the range of values associated with each key to insert
* @param[in] num_keys The total number of keys and values
Expand All @@ -58,7 +57,7 @@
* @param[out] num_inserted The total number of keys successfully inserted
*/
template <typename Map, typename KeyIter, typename ValueIter, typename Predicate>
__global__ void filtered_insert(Map map_view,
__global__ void filtered_insert(Map map_ref,
KeyIter key_begin,
ValueIter value_begin,
std::size_t num_keys,
Expand All @@ -71,9 +70,9 @@ __global__ void filtered_insert(Map map_view,
while (tid < num_keys) {
// Only insert keys that pass the predicate
if (pred(key_begin[tid])) {
// device_mutable_view::insert returns `true` if it is the first time the given key was
// Map::insert returns `true` if it is the first time the given key was
// inserted and `false` if the key already existed
if (map_view.insert({key_begin[tid], value_begin[tid]})) {
if (map_ref.insert(cuco::pair{key_begin[tid], value_begin[tid]})) {
++counter; // Count number of successfully inserted keys
}
}
Expand All @@ -87,25 +86,26 @@ __global__ void filtered_insert(Map map_view,
/**
* @brief For keys that have a match in the map, increments their corresponding value by one.
*
* @tparam Map Type of the map returned from static_map::get_device_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
*
* @param map_view View of the map into which queries will be performed
* @param map_ref Reference of the map into which queries will be performed
* @param key_begin The beginning of the range of keys to query
* @param num_keys The total number of keys
*/
template <typename Map, typename KeyIter>
__global__ void increment_values(Map map_view, KeyIter key_begin, std::size_t num_keys)
__global__ void increment_values(Map map_ref, KeyIter key_begin, std::size_t num_keys)
{
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < num_keys) {
// If the key exists in the map, find returns an iterator to the specified key. Otherwise it
// returns map.end()
auto found = map_view.find(key_begin[tid]);
if (found != map_view.end()) {
auto found = map_ref.find(key_begin[tid]);
if (found != map_ref.end()) {
// If the key exists, atomically increment the associated value
// The value type of the iterator is pair<cuda::atomic<Key>, cuda::atomic<Value>>
found->second.fetch_add(1, cuda::memory_order_relaxed);
auto ref =
cuda::atomic_ref<typename Map::mapped_type, cuda::thread_scope_device>{found->second};
ref.fetch_add(1, cuda::memory_order_relaxed);
}
tid += gridDim.x * blockDim.x;
}
Expand Down Expand Up @@ -135,11 +135,16 @@ int main(void)
std::size_t const capacity = std::ceil(num_keys / load_factor);

// Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
cuco::static_map<Key, Value> map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
auto map = cuco::experimental::static_map{
capacity,
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
thrust::equal_to<Key>{},
cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{}};

// Get a non-owning, mutable view of the map that allows inserts to pass by value into the kernel
auto device_insert_view = map.get_device_mutable_view();
// Get a non-owning, mutable reference of the map that allows inserts to pass by value into the
// kernel
auto insert_ref = map.ref(cuco::experimental::op::insert);

// Predicate will only insert even keys
auto is_even = [] __device__(auto key) { return (key % 2) == 0; };
Expand All @@ -149,7 +154,7 @@ int main(void)

auto constexpr block_size = 256;
auto const grid_size = (num_keys + block_size - 1) / block_size;
filtered_insert<<<grid_size, block_size>>>(device_insert_view,
filtered_insert<<<grid_size, block_size>>>(insert_ref,
insert_keys.begin(),
insert_values.begin(),
num_keys,
Expand All @@ -158,10 +163,11 @@ int main(void)

std::cout << "Number of keys inserted: " << num_inserted[0] << std::endl;

// Get a non-owning view of the map that allows find operations to pass by value into the kernel
auto device_find_view = map.get_device_view();
// Get a non-owning reference of the map that allows find operations to pass by value into the
// kernel
auto find_ref = map.ref(cuco::experimental::op::find);

increment_values<<<grid_size, block_size>>>(device_find_view, insert_keys.begin(), num_keys);
increment_values<<<grid_size, block_size>>>(find_ref, insert_keys.begin(), num_keys);

// Retrieve contents of all the non-empty slots in the map
thrust::device_vector<Key> contained_keys(num_inserted[0]);
Expand Down
4 changes: 2 additions & 2 deletions examples/static_map/host_bulk_example.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,7 @@ int main(void)
std::size_t const capacity = std::ceil(num_keys / load_factor);

// Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
cuco::static_map<Key, Value> map{
auto map = cuco::experimental::static_map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};

// Create a sequence of keys and values {{0,0}, {1,1}, ... {i,i}}
Expand Down
Loading

0 comments on commit d12da95

Please sign in to comment.