Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into heterogeneous-insert
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack committed Sep 29, 2023
2 parents 6cbabd2 + ee9c48a commit 01ae730
Show file tree
Hide file tree
Showing 8 changed files with 387 additions and 10 deletions.
59 changes: 57 additions & 2 deletions include/cuco/detail/open_addressing_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#include <cuda/atomic>

#include <cmath>

namespace cuco {
namespace experimental {
namespace detail {
Expand Down Expand Up @@ -120,8 +122,8 @@ class open_addressing_impl {
* @param stream CUDA stream used to initialize the data structure
*/
constexpr open_addressing_impl(Extent capacity,
key_type empty_key_sentinel,
value_type empty_slot_sentinel,
Key empty_key_sentinel,
Value empty_slot_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
Expand All @@ -135,6 +137,59 @@ class open_addressing_impl {
this->clear_async(stream);
}

/**
* @brief Constructs a statically-sized open addressing data structure with the number of elements
* to insert `n`, the desired load factor, etc.
*
* @note This constructor helps users create a data structure based on the number of elements to
* insert and the desired load factor without manually computing the desired capacity. The actual
* capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by
* multiple factors including the given `n`, the desired load factor, the probing scheme, the CG
* size, and the window size and is computed via the `make_window_extent` factory.
* @note Insert operations will not automatically grow the container.
* @note Attempting to insert more unique keys than the capacity of the container results in
* undefined behavior.
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note This constructor doesn't synchronize the given stream.
* @note This overload will convert compile-time extents to runtime constants which might lead to
* performance regressions.
*
* @throw If the desired occupancy is no bigger than zero
* @throw If the desired occupancy is no smaller than one
*
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param empty_slot_sentinel The reserved slot value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the data structure
*/
constexpr open_addressing_impl(Extent n,
double desired_load_factor,
Key empty_key_sentinel,
Value empty_slot_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
cuda_stream_ref stream)
: empty_key_sentinel_{empty_key_sentinel},
empty_slot_sentinel_{empty_slot_sentinel},
predicate_{pred},
probing_scheme_{probing_scheme},
storage_{make_window_extent<open_addressing_impl>(
static_cast<size_type>(std::ceil(static_cast<double>(n) / desired_load_factor))),
alloc}
{
CUCO_EXPECTS(desired_load_factor > 0., "Desired occupancy must be larger than zero");
CUCO_EXPECTS(desired_load_factor < 1., "Desired occupancy must be smaller than one");

this->clear_async(stream);
}

/**
* @brief Erases all elements from the container. After this call, `size()` returns zero.
* Invalidates any references, pointers, or iterators referring to contained elements.
Expand Down
29 changes: 29 additions & 0 deletions include/cuco/detail/static_map/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,35 @@ constexpr static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
{
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
constexpr static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
static_map(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
cuda_stream_ref stream)
: impl_{std::make_unique<impl_type>(n,
desired_load_factor,
empty_key_sentinel,
cuco::pair{empty_key_sentinel, empty_value_sentinel},
pred,
probing_scheme,
alloc,
stream)},
empty_value_sentinel_{empty_value_sentinel}
{
}

template <class Key,
class T,
class Extent,
Expand Down
26 changes: 26 additions & 0 deletions include/cuco/detail/static_set/static_set.inl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@ constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Sto
{
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::static_set(
Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
Allocator const& alloc,
cuda_stream_ref stream)
: impl_{std::make_unique<impl_type>(n,
desired_load_factor,
empty_key_sentinel,
empty_key_sentinel,
pred,
probing_scheme,
alloc,
stream)}
{
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
Expand Down
46 changes: 42 additions & 4 deletions include/cuco/static_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ namespace experimental {
* @tparam Allocator Type of allocator used for device storage
* @tparam Storage Slot window storage type
*/

template <class Key,
class T,
class Extent = cuco::experimental::extent<std::size_t>,
Expand Down Expand Up @@ -156,7 +155,7 @@ class static_map {

/**
* @brief Constructs a statically-sized map with the specified initial capacity, sentinel values
* and CUDA stream.
* and CUDA stream
*
* The actual map capacity depends on the given `capacity`, the probing scheme, CG size, and the
* window size and it is computed via the `make_window_extent` factory. Insert operations will not
Expand All @@ -165,8 +164,7 @@ class static_map {
*
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the
* stream before the object is first used.
* @note This constructor doesn't synchronize the given stream.
*
* @param capacity The requested lower-bound map size
* @param empty_key_sentinel The reserved key value for empty slots
Expand All @@ -184,6 +182,46 @@ class static_map {
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Constructs a statically-sized map with the number of elements to insert `n`, the desired
* load factor, etc
*
* @note This constructor helps users create a map based on the number of elements to insert and
* the desired load factor without manually computing the desired capacity. The actual map
* capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by
* multiple factors including the given `n`, the desired load factor, the probing scheme, the CG
* size, and the window size and is computed via the `make_window_extent` factory.
* @note Insert operations will not automatically grow the container.
* @note Attempting to insert more unique keys than the capacity of the container results in
* undefined behavior.
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note This constructor doesn't synchronize the given stream.
* @note This overload will convert compile-time extents to runtime constants which might lead to
* performance regressions.
*
* @throw If the desired occupancy is no bigger than zero
* @throw If the desired occupancy is no smaller than one
*
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param empty_value_sentinel The reserved mapped value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the map
*/
constexpr static_map(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
KeyEqual const& pred = {},
ProbingScheme const& probing_scheme = {},
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Erases all elements from the container. After this call, `size()` returns zero.
* Invalidates any references, pointers, or iterators referring to contained elements.
Expand Down
44 changes: 40 additions & 4 deletions include/cuco/static_set.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ namespace experimental {
* @tparam Allocator Type of allocator used for device storage
* @tparam Storage Slot window storage type
*/

template <class Key,
class Extent = cuco::experimental::extent<std::size_t>,
cuda::thread_scope Scope = cuda::thread_scope_device,
Expand Down Expand Up @@ -131,7 +130,7 @@ class static_set {

/**
* @brief Constructs a statically-sized set with the specified initial capacity, sentinel values
* and CUDA stream.
* and CUDA stream
*
* The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the
* window size and it is computed via the `make_window_extent` factory. Insert operations will not
Expand All @@ -140,8 +139,7 @@ class static_set {
*
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the
* stream before the object is first used.
* @note This constructor doesn't synchronize the given stream.
*
* @param capacity The requested lower-bound set size
* @param empty_key_sentinel The reserved key value for empty slots
Expand All @@ -157,6 +155,44 @@ class static_set {
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Constructs a statically-sized map with the number of elements to insert `n`, the desired
* load factor, etc
*
* @note This constructor helps users create a set based on the number of elements to insert and
* the desired load factor without manually computing the desired capacity. The actual set
* capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by
* multiple factors including the given `n`, the desired load factor, the probing scheme, the CG
* size, and the window size and is computed via the `make_window_extent` factory.
* @note Insert operations will not automatically grow the container.
* @note Attempting to insert more unique keys than the capacity of the container results in
* undefined behavior.
* @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert
* this sentinel value.
* @note This constructor doesn't synchronize the given stream.
* @note This overload will convert compile-time extents to runtime constants which might lead to
* performance regressions.
*
* @throw If the desired occupancy is no bigger than zero
* @throw If the desired occupancy is no smaller than one
*
* @param n The number of elements to insert
* @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50%
* load factor
* @param empty_key_sentinel The reserved key value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the set
*/
constexpr static_set(Extent n,
double desired_load_factor,
empty_key<Key> empty_key_sentinel,
KeyEqual const& pred = {},
ProbingScheme const& probing_scheme = {},
Allocator const& alloc = {},
cuda_stream_ref stream = {});

/**
* @brief Erases all elements from the container. After this call, `size()` returns zero.
* Invalidates any references, pointers, or iterators referring to contained elements.
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ ConfigureTest(STATIC_SET_TEST
###################################################################################################
# - static_map tests ------------------------------------------------------------------------------
ConfigureTest(STATIC_MAP_TEST
static_map/capacity_test.cu
static_map/custom_type_test.cu
static_map/duplicate_keys_test.cu
static_map/erase_test.cu
Expand Down
Loading

0 comments on commit 01ae730

Please sign in to comment.