diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json
index 97ac85b23..606759c73 100644
--- a/.devcontainer/cuda11.8-gcc11/devcontainer.json
+++ b/.devcontainer/cuda11.8-gcc11/devcontainer.json
@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
- "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda11.8-ubuntu22.04",
+ "image": "rapidsai/devcontainers:24.10-cpp-gcc11-cuda11.8-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
diff --git a/.devcontainer/cuda12.5-gcc12/devcontainer.json b/.devcontainer/cuda12.5-gcc12/devcontainer.json
new file mode 100644
index 000000000..3f562f865
--- /dev/null
+++ b/.devcontainer/cuda12.5-gcc12/devcontainer.json
@@ -0,0 +1,42 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.10-cpp-gcc12-cuda12.5-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": true
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.5-gcc12",
+ "CUCO_CUDA_VERSION": "12.5",
+ "CUCO_HOST_COMPILER": "gcc",
+ "CUCO_HOST_COMPILER_VERSION": "12"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd"
+ ],
+ "settings": {
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}/build/latest"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.5-gcc12"
+}
diff --git a/.devcontainer/cuda12.5-gcc13/devcontainer.json b/.devcontainer/cuda12.5-gcc13/devcontainer.json
new file mode 100644
index 000000000..01bbe927b
--- /dev/null
+++ b/.devcontainer/cuda12.5-gcc13/devcontainer.json
@@ -0,0 +1,42 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": true
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.5-gcc13",
+ "CUCO_CUDA_VERSION": "12.5",
+ "CUCO_HOST_COMPILER": "gcc",
+ "CUCO_HOST_COMPILER_VERSION": "13"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd"
+ ],
+ "settings": {
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}/build/latest"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.5-gcc13"
+}
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 355d73995..01bbe927b 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
- "image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
+ "image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
@@ -14,10 +14,10 @@
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
- "DEVCONTAINER_NAME": "cuda12.4-gcc12",
- "CUCO_CUDA_VERSION": "12.4",
+ "DEVCONTAINER_NAME": "cuda12.5-gcc13",
+ "CUCO_CUDA_VERSION": "12.5",
"CUCO_HOST_COMPILER": "gcc",
- "CUCO_HOST_COMPILER_VERSION": "12"
+ "CUCO_HOST_COMPILER_VERSION": "13"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
@@ -38,5 +38,5 @@
}
}
},
- "name": "cuda12.4-gcc12"
+ "name": "cuda12.5-gcc13"
}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6b9e97e83..8de18db73 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
- file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.08/RAPIDS.cmake
+ file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.10/RAPIDS.cmake
${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
endif()
include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
diff --git a/README.md b/README.md
index 6b848f8d2..bc50aae02 100644
--- a/README.md
+++ b/README.md
@@ -206,8 +206,8 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/96re4zhjo))
- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/7aKWdGTfx))
-- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfqGxdha))
-- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/zdTnbE1q5))
+- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/7f9KW44P4))
+- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/Ws5c71T4z))
- [Using set as mapping table to handle large keys or indeterministic sentinels](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/mapping_table_example.cu) (see [live example in godbolt](https://godbolt.org/z/KfYo4nMss))
### `static_map`
@@ -234,12 +234,19 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
#### Examples:
- [Host-bulk APIs (TODO)]()
-### `distinct_count_estimator`
+### `hyperloglog`
-`cuco::distinct_count_estimator` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.
+`cuco::hyperloglog` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.
#### Examples:
-- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfofM6qd))
-- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/156T9ox7h))
+- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/G4qdcTezE))
+- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/n88713o4n))
+
+### `bloom_filter`
+
+`cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries.
+
+#### Examples:
+- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/EY7T5v5aE))
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index b4d20083f..c4d4dbc37 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -35,7 +35,7 @@ function(ConfigureBench BENCH_NAME)
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
target_include_directories(${BENCH_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}")
- target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -lineinfo)
+ target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda -lineinfo)
target_link_libraries(${BENCH_NAME} PRIVATE
nvbench::main
pthread
@@ -49,53 +49,59 @@ endfunction(ConfigureBench)
###################################################################################################
# - static_set benchmarks -------------------------------------------------------------------------
ConfigureBench(STATIC_SET_BENCH
- hash_table/static_set/contains_bench.cu
- hash_table/static_set/find_bench.cu
- hash_table/static_set/insert_bench.cu
- hash_table/static_set/retrieve_all_bench.cu
- hash_table/static_set/size_bench.cu
- hash_table/static_set/rehash_bench.cu)
+ static_set/contains_bench.cu
+ static_set/find_bench.cu
+ static_set/insert_bench.cu
+ static_set/retrieve_all_bench.cu
+ static_set/size_bench.cu
+ static_set/rehash_bench.cu)
###################################################################################################
# - static_map benchmarks -------------------------------------------------------------------------
ConfigureBench(STATIC_MAP_BENCH
- hash_table/static_map/insert_bench.cu
- hash_table/static_map/find_bench.cu
- hash_table/static_map/contains_bench.cu
- hash_table/static_map/erase_bench.cu
- hash_table/static_map/insert_or_apply_bench.cu)
+ static_map/insert_bench.cu
+ static_map/find_bench.cu
+ static_map/contains_bench.cu
+ static_map/erase_bench.cu
+ static_map/insert_or_apply_bench.cu)
###################################################################################################
# - static_multiset benchmarks --------------------------------------------------------------------
ConfigureBench(STATIC_MULTISET_BENCH
- hash_table/static_multiset/contains_bench.cu
- hash_table/static_multiset/retrieve_bench.cu
- hash_table/static_multiset/count_bench.cu
- hash_table/static_multiset/find_bench.cu
- hash_table/static_multiset/insert_bench.cu)
+ static_multiset/contains_bench.cu
+ static_multiset/retrieve_bench.cu
+ static_multiset/count_bench.cu
+ static_multiset/find_bench.cu
+ static_multiset/insert_bench.cu)
###################################################################################################
# - static_multimap benchmarks --------------------------------------------------------------------
ConfigureBench(STATIC_MULTIMAP_BENCH
- hash_table/static_multimap/insert_bench.cu
- hash_table/static_multimap/retrieve_bench.cu
- hash_table/static_multimap/query_bench.cu
- hash_table/static_multimap/count_bench.cu)
+ static_multimap/insert_bench.cu
+ static_multimap/retrieve_bench.cu
+ static_multimap/query_bench.cu
+ static_multimap/count_bench.cu)
###################################################################################################
# - dynamic_map benchmarks ------------------------------------------------------------------------
ConfigureBench(DYNAMIC_MAP_BENCH
- hash_table/dynamic_map/insert_bench.cu
- hash_table/dynamic_map/find_bench.cu
- hash_table/dynamic_map/contains_bench.cu
- hash_table/dynamic_map/erase_bench.cu)
+ dynamic_map/insert_bench.cu
+ dynamic_map/find_bench.cu
+ dynamic_map/contains_bench.cu
+ dynamic_map/erase_bench.cu)
###################################################################################################
# - hash function benchmarks ----------------------------------------------------------------------
-ConfigureBench(HASH_BENCH
- hash_bench.cu)
+ConfigureBench(HASH_FUNCTION_BENCH
+ hash_function/hash_function_bench.cu)
###################################################################################################
-# - distinct_count_estimator benchmarks -----------------------------------------------------------
-ConfigureBench(DISTINCT_COUNT_ESTIMATOR_BENCH
- distinct_count_estimator_bench.cu)
+# - hyperloglog benchmarks -----------------------------------------------------------
+ConfigureBench(HYPERLOGLOG_BENCH
+ hyperloglog/hyperloglog_bench.cu)
+
+###################################################################################################
+# - bloom_filter benchmarks -----------------------------------------------------------------------
+ConfigureBench(BLOOM_FILTER_BENCH
+ bloom_filter/add_bench.cu
+ bloom_filter/contains_bench.cu)
diff --git a/benchmarks/analysis/notebooks/StaticMultimap.ipynb b/benchmarks/analysis/notebooks/StaticMultimap.ipynb
deleted file mode 100644
index 0269c5690..000000000
--- a/benchmarks/analysis/notebooks/StaticMultimap.ipynb
+++ /dev/null
@@ -1,596 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Preparation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# !pip install pandas\n",
- "# !pip install matplotlib\n",
- "\n",
- "# Import libraries\n",
- "import pandas as pd\n",
- "\n",
- "from Utils import *"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Global Parameters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Specify the data path\n",
- "datafile = '../data/static-multimap-data.csv'\n",
- "\n",
- "output_keys = ['Benchmark', 'Label', 'Distribution', 'MatchingRate', 'Multiplicity', \\\n",
- " 'NumInputs', 'Occupancy', 'GPU Time (sec)', 'Elem/s (elem/sec)']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Import Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Read csv file\n",
- "rawdf = pd.read_csv(datafile)\n",
- "\n",
- "# Filter out skipped tests\n",
- "perfdf = rawdf[rawdf[\"Key\"] == rawdf[\"Value\"]].reset_index(drop=True)\n",
- "\n",
- "\n",
- "# Add labels\n",
- "perfdf['Label'] = perfdf[\"Key\"]\n",
- "perfdf.loc[perfdf['Distribution'].notnull(), 'Label'] += \"_\" + perfdf['Distribution']\n",
- "\n",
- "# Trim data frame for visualization\n",
- "perfdf = perfdf[output_keys]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Visualization"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Visualization Parameters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "staic_multimap_insert_uniform_multiplicity\n",
- "staic_multimap_insert_occupancy\n",
- "staic_multimap_count_uniform_multiplicity\n",
- "staic_multimap_count_occupancy\n",
- "staic_multimap_retrieve_uniform_multiplicity\n",
- "staic_multimap_retrieve_occupancy\n",
- "staic_multimap_retrieve_matching_rate\n",
- "staic_multimap_query_uniform_multiplicity\n",
- "staic_multimap_query_occupancy\n",
- "staic_multimap_query_matching_rate\n",
- "staic_multimap_count_matching_rate\n"
- ]
- }
- ],
- "source": [
- "# Get benchmark list\n",
- "unique_bms = perfdf[\"Benchmark\"].unique()\n",
- "for it in unique_bms:\n",
- " print(it)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### `insert` performance by varying key muliplicities\n",
- "
\n",
- "- 100'000'000 insertions
\n",
- "- Fixed matching rate: 0.5
\n",
- "- Fixed occupancy: 0.8
\n",
- "- UNIFORM distribution
\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- "