Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into retrieve
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack committed Oct 17, 2024
2 parents ae8e396 + ae3ac6e commit f841fce
Show file tree
Hide file tree
Showing 139 changed files with 7,294 additions and 2,559 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/cuda11.8-gcc11/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda11.8-ubuntu22.04",
"image": "rapidsai/devcontainers:24.10-cpp-gcc11-cuda11.8-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
Expand Down
42 changes: 42 additions & 0 deletions .devcontainer/cuda12.5-gcc12/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.10-cpp-gcc12-cuda12.5-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.5-gcc12",
"CUCO_CUDA_VERSION": "12.5",
"CUCO_HOST_COMPILER": "gcc",
"CUCO_HOST_COMPILER_VERSION": "12"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd"
],
"settings": {
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}/build/latest"
]
}
}
},
"name": "cuda12.5-gcc12"
}
42 changes: 42 additions & 0 deletions .devcontainer/cuda12.5-gcc13/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.5-gcc13",
"CUCO_CUDA_VERSION": "12.5",
"CUCO_HOST_COMPILER": "gcc",
"CUCO_HOST_COMPILER_VERSION": "13"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd"
],
"settings": {
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}/build/latest"
]
}
}
},
"name": "cuda12.5-gcc13"
}
10 changes: 5 additions & 5 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
"image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
"hostRequirements": {
"gpu": true
},
Expand All @@ -14,10 +14,10 @@
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.4-gcc12",
"CUCO_CUDA_VERSION": "12.4",
"DEVCONTAINER_NAME": "cuda12.5-gcc13",
"CUCO_CUDA_VERSION": "12.5",
"CUCO_HOST_COMPILER": "gcc",
"CUCO_HOST_COMPILER_VERSION": "12"
"CUCO_HOST_COMPILER_VERSION": "13"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
Expand All @@ -38,5 +38,5 @@
}
}
},
"name": "cuda12.4-gcc12"
"name": "cuda12.5-gcc13"
}
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)

if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.08/RAPIDS.cmake
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.10/RAPIDS.cmake
${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
endif()
include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
Expand Down
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/96re4zhjo))
- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/7aKWdGTfx))
- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfqGxdha))
- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/zdTnbE1q5))
- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/7f9KW44P4))
- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/Ws5c71T4z))
- [Using set as mapping table to handle large keys or indeterministic sentinels](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/mapping_table_example.cu) (see [live example in godbolt](https://godbolt.org/z/KfYo4nMss))

### `static_map`
Expand All @@ -234,12 +234,19 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
#### Examples:
- [Host-bulk APIs (TODO)]()

### `distinct_count_estimator`
### `hyperloglog`

`cuco::distinct_count_estimator` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.
`cuco::hyperloglog` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.

#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfofM6qd))
- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/156T9ox7h))
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/G4qdcTezE))
- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/n88713o4n))

### `bloom_filter`

`cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries.

#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/EY7T5v5aE))


66 changes: 36 additions & 30 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function(ConfigureBench BENCH_NAME)
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
target_include_directories(${BENCH_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}")
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -lineinfo)
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda -lineinfo)
target_link_libraries(${BENCH_NAME} PRIVATE
nvbench::main
pthread
Expand All @@ -49,53 +49,59 @@ endfunction(ConfigureBench)
###################################################################################################
# - static_set benchmarks -------------------------------------------------------------------------
ConfigureBench(STATIC_SET_BENCH
hash_table/static_set/contains_bench.cu
hash_table/static_set/find_bench.cu
hash_table/static_set/insert_bench.cu
hash_table/static_set/retrieve_all_bench.cu
hash_table/static_set/size_bench.cu
hash_table/static_set/rehash_bench.cu)
static_set/contains_bench.cu
static_set/find_bench.cu
static_set/insert_bench.cu
static_set/retrieve_all_bench.cu
static_set/size_bench.cu
static_set/rehash_bench.cu)

###################################################################################################
# - static_map benchmarks -------------------------------------------------------------------------
ConfigureBench(STATIC_MAP_BENCH
hash_table/static_map/insert_bench.cu
hash_table/static_map/find_bench.cu
hash_table/static_map/contains_bench.cu
hash_table/static_map/erase_bench.cu
hash_table/static_map/insert_or_apply_bench.cu)
static_map/insert_bench.cu
static_map/find_bench.cu
static_map/contains_bench.cu
static_map/erase_bench.cu
static_map/insert_or_apply_bench.cu)

###################################################################################################
# - static_multiset benchmarks --------------------------------------------------------------------
ConfigureBench(STATIC_MULTISET_BENCH
hash_table/static_multiset/contains_bench.cu
hash_table/static_multiset/retrieve_bench.cu
hash_table/static_multiset/count_bench.cu
hash_table/static_multiset/find_bench.cu
hash_table/static_multiset/insert_bench.cu)
static_multiset/contains_bench.cu
static_multiset/retrieve_bench.cu
static_multiset/count_bench.cu
static_multiset/find_bench.cu
static_multiset/insert_bench.cu)

###################################################################################################
# - static_multimap benchmarks --------------------------------------------------------------------
ConfigureBench(STATIC_MULTIMAP_BENCH
hash_table/static_multimap/insert_bench.cu
hash_table/static_multimap/retrieve_bench.cu
hash_table/static_multimap/query_bench.cu
hash_table/static_multimap/count_bench.cu)
static_multimap/insert_bench.cu
static_multimap/retrieve_bench.cu
static_multimap/query_bench.cu
static_multimap/count_bench.cu)

###################################################################################################
# - dynamic_map benchmarks ------------------------------------------------------------------------
ConfigureBench(DYNAMIC_MAP_BENCH
hash_table/dynamic_map/insert_bench.cu
hash_table/dynamic_map/find_bench.cu
hash_table/dynamic_map/contains_bench.cu
hash_table/dynamic_map/erase_bench.cu)
dynamic_map/insert_bench.cu
dynamic_map/find_bench.cu
dynamic_map/contains_bench.cu
dynamic_map/erase_bench.cu)

###################################################################################################
# - hash function benchmarks ----------------------------------------------------------------------
ConfigureBench(HASH_BENCH
hash_bench.cu)
ConfigureBench(HASH_FUNCTION_BENCH
hash_function/hash_function_bench.cu)

###################################################################################################
# - distinct_count_estimator benchmarks -----------------------------------------------------------
ConfigureBench(DISTINCT_COUNT_ESTIMATOR_BENCH
distinct_count_estimator_bench.cu)
# - hyperloglog benchmarks -----------------------------------------------------------
ConfigureBench(HYPERLOGLOG_BENCH
hyperloglog/hyperloglog_bench.cu)

###################################################################################################
# - bloom_filter benchmarks -----------------------------------------------------------------------
ConfigureBench(BLOOM_FILTER_BENCH
bloom_filter/add_bench.cu
bloom_filter/contains_bench.cu)
Loading

0 comments on commit f841fce

Please sign in to comment.