diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3635336e8..6b03cb98c 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -53,7 +53,8 @@ ConfigureBench(STATIC_SET_BENCH hash_table/static_set/find_bench.cu hash_table/static_set/insert_bench.cu hash_table/static_set/retrieve_all_bench.cu - hash_table/static_set/size_bench.cu) + hash_table/static_set/size_bench.cu + hash_table/static_set/rehash_bench.cu) ################################################################################################### # - static_map benchmarks ------------------------------------------------------------------------- diff --git a/benchmarks/hash_table/static_set/rehash_bench.cu b/benchmarks/hash_table/static_set/rehash_bench.cu new file mode 100644 index 000000000..167f51cec --- /dev/null +++ b/benchmarks/hash_table/static_set/rehash_bench.cu @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include + +/** + * @brief A benchmark evaluating `cuco::static_set::size` performance + */ +template +void static_set_rehash(nvbench::state& state, nvbench::type_list) +{ + auto const num_keys = state.get_int64_or_default("NumInputs", cuco::benchmark::defaults::N); + auto const occupancy = + state.get_float64_or_default("Occupancy", cuco::benchmark::defaults::OCCUPANCY); + + std::size_t const size = num_keys / occupancy; + + thrust::device_vector keys(num_keys); + + cuco::utility::key_generator gen; + gen.generate(cuco::benchmark::dist_from_state(state), keys.begin(), keys.end()); + + state.add_element_count(num_keys); + + cuco::experimental::static_set set{size, cuco::empty_key{-1}}; + + set.insert(keys.begin(), keys.end()); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { set.rehash({launch.get_stream()}); }); +} + +NVBENCH_BENCH_TYPES(static_set_rehash, + NVBENCH_TYPE_AXES(cuco::benchmark::defaults::KEY_TYPE_RANGE, + nvbench::type_list)) + .set_name("static_set_rehash_unique_occupancy") + .set_type_axes_names({"Key", "Distribution"}) + .set_max_noise(cuco::benchmark::defaults::MAX_NOISE) + .add_float64_axis("Occupancy", cuco::benchmark::defaults::OCCUPANCY_RANGE); diff --git a/include/cuco/detail/common_kernels.cuh b/include/cuco/detail/common_kernels.cuh index 3186b8eee..cd13ad8ff 100644 --- a/include/cuco/detail/common_kernels.cuh +++ b/include/cuco/detail/common_kernels.cuh @@ -288,7 +288,6 @@ __global__ void rehash(StorageRef storage_ref, ContainerRef container_ref, Predi if (idx < n) { auto const window = storage_ref[idx]; - // #pragma unroll window.size() for (auto const& slot : window) { if (is_filled(slot)) { buffer[atomicAdd_block(&buffer_size, 1)] = slot; } }