diff --git a/cpp/src/groupby/hash/compute_groupby.cu b/cpp/src/groupby/hash/compute_groupby.cu
index e1dbf2a3d9e..9648d942513 100644
--- a/cpp/src/groupby/hash/compute_groupby.cu
+++ b/cpp/src/groupby/hash/compute_groupby.cu
@@ -61,7 +61,7 @@ std::unique_ptr
compute_groupby(table_view const& keys,
d_row_equal,
probing_scheme_t{d_row_hash},
cuco::thread_scope_device,
- cuco::storage{},
+ cuco::storage{},
cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream},
stream.value()};
diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cuh b/cpp/src/groupby/hash/compute_mapping_indices.cuh
index d353830780f..f86a93109be 100644
--- a/cpp/src/groupby/hash/compute_mapping_indices.cuh
+++ b/cpp/src/groupby/hash/compute_mapping_indices.cuh
@@ -106,15 +106,15 @@ CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows,
__shared__ cudf::size_type shared_set_indices[GROUPBY_SHM_MAX_ELEMENTS];
// Shared set initialization
- __shared__ cuco::window windows[window_extent.value()];
+ __shared__ cuco::bucket buckets[bucket_extent.value()];
auto raw_set = cuco::static_set_ref{
cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
global_set.key_eq(),
probing_scheme_t{global_set.hash_function()},
cuco::thread_scope_block,
- cuco::aow_storage_ref{
- window_extent, windows}};
+ cuco::bucket_storage_ref{
+ bucket_extent, buckets}};
auto shared_set = raw_set.rebind_operators(cuco::insert_and_find);
auto const block = cooperative_groups::this_thread_block();
diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh
index f950e03e0fb..92925e11bac 100644
--- a/cpp/src/groupby/hash/helpers.cuh
+++ b/cpp/src/groupby/hash/helpers.cuh
@@ -27,7 +27,7 @@ namespace cudf::groupby::detail::hash {
CUDF_HOST_DEVICE auto constexpr GROUPBY_CG_SIZE = 1;
/// Number of slots per thread
-CUDF_HOST_DEVICE auto constexpr GROUPBY_WINDOW_SIZE = 1;
+CUDF_HOST_DEVICE auto constexpr GROUPBY_BUCKET_SIZE = 1;
/// Thread block size
CUDF_HOST_DEVICE auto constexpr GROUPBY_BLOCK_SIZE = 128;
@@ -48,9 +48,9 @@ using shmem_extent_t =
cuco::extent(static_cast(GROUPBY_SHM_MAX_ELEMENTS) * 1.43)>;
-/// Number of windows needed by each shared memory hash set
-CUDF_HOST_DEVICE auto constexpr window_extent =
- cuco::make_window_extent(shmem_extent_t{});
+/// Number of buckets needed by each shared memory hash set
+CUDF_HOST_DEVICE auto constexpr bucket_extent =
+ cuco::make_bucket_extent(shmem_extent_t{});
using row_hash_t =
cudf::experimental::row::hash::device_row_hasher,
- cuco::storage>;
+ cuco::storage>;
using nullable_global_set_t = cuco::static_set,
@@ -83,7 +83,7 @@ using nullable_global_set_t = cuco::static_set,
- cuco::storage>;
+ cuco::storage>;
template
using hash_set_ref_t = cuco::static_set_ref<
@@ -91,7 +91,7 @@ using hash_set_ref_t = cuco::static_set_ref<
cuda::thread_scope_device,
row_comparator_t,
probing_scheme_t,
- cuco::aow_storage_ref>,
+ cuco::bucket_storage_ref>,
Op>;
template
@@ -100,6 +100,6 @@ using nullable_hash_set_ref_t = cuco::static_set_ref<
cuda::thread_scope_device,
nullable_row_comparator_t,
probing_scheme_t,
- cuco::aow_storage_ref>,
+ cuco::bucket_storage_ref>,
Op>;
} // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu
index 0cb5c382631..7facc6497ed 100644
--- a/cpp/src/io/orc/dict_enc.cu
+++ b/cpp/src/io/orc/dict_enc.cu
@@ -180,9 +180,9 @@ CUDF_KERNEL void __launch_bounds__(block_size)
for (size_type i = 0; i < dict.map_slots.size(); i += block_size) {
if (t + i < dict.map_slots.size()) {
- auto window = dict.map_slots.begin() + t + i;
- // Collect all slots from each window.
- for (auto& slot : *window) {
+ auto bucket = dict.map_slots.begin() + t + i;
+ // Collect all slots from each bucket.
+ for (auto& slot : *bucket) {
auto const key = slot.first;
if (key != KEY_SENTINEL) {
auto loc = counter.fetch_add(1, memory_order_relaxed);
diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp
index 0949fafe9a4..654ee1e012c 100644
--- a/cpp/src/io/orc/orc_gpu.hpp
+++ b/cpp/src/io/orc/orc_gpu.hpp
@@ -47,16 +47,16 @@ using slot_type = cuco::pair;
auto constexpr map_cg_size =
1; ///< A CUDA Cooperative Group of 1 thread (set for best performance) to handle each subset.
///< Note: Adjust insert and find loops to use `cg::tile` if increasing this.
-auto constexpr window_size =
+auto constexpr bucket_size =
1; ///< Number of concurrent slots (set for best performance) handled by each thread.
auto constexpr occupancy_factor = 1.43f; ///< cuCollections suggests using a hash map of size
///< N * (1/0.7) = 1.43 to target a 70% occupancy factor.
-using storage_type = cuco::aow_storage,
- cudf::detail::cuco_allocator>;
+using storage_type = cuco::bucket_storage,
+ cudf::detail::cuco_allocator>;
using storage_ref_type = typename storage_type::ref_type;
-using window_type = typename storage_type::window_type;
+using bucket_type = typename storage_type::bucket_type;
using slot_type = cuco::pair;
auto constexpr KEY_SENTINEL = size_type{-1};
@@ -193,7 +193,7 @@ struct StripeStream {
*/
struct stripe_dictionary {
// input
- device_span map_slots; // hash map (windows) storage
+ device_span map_slots; // hash map (buckets) storage
uint32_t column_idx = 0; // column index
size_type start_row = 0; // first row in the stripe
size_type start_rowgroup = 0; // first rowgroup in the stripe
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index b85ebf2fa1a..b5f9b894c46 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -210,7 +210,7 @@ struct map_find_fn {
template
CUDF_KERNEL void __launch_bounds__(block_size)
- populate_chunk_hash_maps_kernel(device_span const map_storage,
+ populate_chunk_hash_maps_kernel(device_span const map_storage,
cudf::detail::device_2dspan frags)
{
auto const col_idx = blockIdx.y;
@@ -239,7 +239,7 @@ CUDF_KERNEL void __launch_bounds__(block_size)
template
CUDF_KERNEL void __launch_bounds__(block_size)
- collect_map_entries_kernel(device_span const map_storage,
+ collect_map_entries_kernel(device_span const map_storage,
device_span chunks)
{
auto& chunk = chunks[blockIdx.x];
@@ -251,11 +251,11 @@ CUDF_KERNEL void __launch_bounds__(block_size)
if (t == 0) { new (&counter) cuda::atomic{0}; }
__syncthreads();
- // Iterate over all windows in the map.
+ // Iterate over all buckets in the map.
for (; t < chunk.dict_map_size; t += block_size) {
- auto window = map_storage.data() + chunk.dict_map_offset + t;
- // Collect all slots from each window.
- for (auto& slot : *window) {
+ auto bucket = map_storage.data() + chunk.dict_map_offset + t;
+ // Collect all slots from each bucket.
+ for (auto& slot : *bucket) {
auto const key = slot.first;
if (key != KEY_SENTINEL) {
auto const loc = counter.fetch_add(1, memory_order_relaxed);
@@ -272,7 +272,7 @@ CUDF_KERNEL void __launch_bounds__(block_size)
template
CUDF_KERNEL void __launch_bounds__(block_size)
- get_dictionary_indices_kernel(device_span const map_storage,
+ get_dictionary_indices_kernel(device_span const map_storage,
cudf::detail::device_2dspan frags)
{
auto const col_idx = blockIdx.y;
@@ -302,7 +302,7 @@ CUDF_KERNEL void __launch_bounds__(block_size)
s_ck_start_val_idx);
}
-void populate_chunk_hash_maps(device_span const map_storage,
+void populate_chunk_hash_maps(device_span const map_storage,
cudf::detail::device_2dspan frags,
rmm::cuda_stream_view stream)
{
@@ -311,7 +311,7 @@ void populate_chunk_hash_maps(device_span const map_storage,
<<>>(map_storage, frags);
}
-void collect_map_entries(device_span const map_storage,
+void collect_map_entries(device_span const map_storage,
device_span chunks,
rmm::cuda_stream_view stream)
{
@@ -320,7 +320,7 @@ void collect_map_entries(device_span const map_storage,
<<>>(map_storage, chunks);
}
-void get_dictionary_indices(device_span const map_storage,
+void get_dictionary_indices(device_span const map_storage,
cudf::detail::device_2dspan frags,
rmm::cuda_stream_view stream)
{
diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh
index 7c09764da2d..800875f7448 100644
--- a/cpp/src/io/parquet/parquet_gpu.cuh
+++ b/cpp/src/io/parquet/parquet_gpu.cuh
@@ -34,7 +34,7 @@ using slot_type = cuco::pair;
auto constexpr map_cg_size =
1; ///< A CUDA Cooperative Group of 1 thread (set for best performance) to handle each subset.
///< Note: Adjust insert and find loops to use `cg::tile` if increasing this.
-auto constexpr window_size =
+auto constexpr bucket_size =
1; ///< Number of concurrent slots (set for best performance) handled by each thread.
auto constexpr occupancy_factor = 1.43f; ///< cuCollections suggests using a hash map of size
///< N * (1/0.7) = 1.43 to target a 70% occupancy factor.
@@ -43,12 +43,12 @@ auto constexpr KEY_SENTINEL = key_type{-1};
auto constexpr VALUE_SENTINEL = mapped_type{-1};
auto constexpr SCOPE = cuda::thread_scope_block;
-using storage_type = cuco::aow_storage,
- cudf::detail::cuco_allocator>;
+using storage_type = cuco::bucket_storage,
+ cudf::detail::cuco_allocator>;
using storage_ref_type = typename storage_type::ref_type;
-using window_type = typename storage_type::window_type;
+using bucket_type = typename storage_type::bucket_type;
/**
* @brief Return the byte length of parquet dtypes that are physically represented by INT32
@@ -100,7 +100,7 @@ inline size_type __device__ row_to_value_idx(size_type idx,
* @param frags Column fragments
* @param stream CUDA stream to use
*/
-void populate_chunk_hash_maps(device_span const map_storage,
+void populate_chunk_hash_maps(device_span const map_storage,
cudf::detail::device_2dspan frags,
rmm::cuda_stream_view stream);
@@ -111,7 +111,7 @@ void populate_chunk_hash_maps(device_span const map_storage,
* @param chunks Flat span of chunks to compact hash maps for
* @param stream CUDA stream to use
*/
-void collect_map_entries(device_span const map_storage,
+void collect_map_entries(device_span const map_storage,
device_span chunks,
rmm::cuda_stream_view stream);
@@ -128,7 +128,7 @@ void collect_map_entries(device_span const map_storage,
* @param frags Column fragments
* @param stream CUDA stream to use
*/
-void get_dictionary_indices(device_span const map_storage,
+void get_dictionary_indices(device_span const map_storage,
cudf::detail::device_2dspan frags,
rmm::cuda_stream_view stream);
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 188e6a8c0d8..6db92462498 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -1302,7 +1302,7 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks,
} else {
chunk.use_dictionary = true;
chunk.dict_map_size =
- static_cast(cuco::make_window_extent(
+ static_cast(cuco::make_bucket_extent(
static_cast(occupancy_factor * chunk.num_values)));
chunk.dict_map_offset = total_map_storage_size;
total_map_storage_size += chunk.dict_map_size;
@@ -1317,7 +1317,7 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks,
total_map_storage_size,
cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}};
// Create a span of non-const map_storage as map_storage_ref takes in a non-const pointer.
- device_span const map_storage_data{map_storage.data(), total_map_storage_size};
+ device_span const map_storage_data{map_storage.data(), total_map_storage_size};
// Synchronize
chunks.host_to_device_async(stream);