diff --git a/.devcontainer/cuda12.2-gcc12/devcontainer.json b/.devcontainer/cuda12.2-gcc12/devcontainer.json
new file mode 100644
index 000000000..199ce44f4
--- /dev/null
+++ b/.devcontainer/cuda12.2-gcc12/devcontainer.json
@@ -0,0 +1,39 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:23.08-cpp-gcc12-cuda12.2-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": true
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.2-gcc12"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd"
+ ],
+ "settings": {
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}/build/latest"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.2-gcc12"
+}
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 000000000..84cfa82cc
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:23.08-cpp-gcc12-cuda12.2-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": true
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd"
+ ],
+ "settings": {
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}/build/latest"
+ ]
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/.devcontainer/launch.sh b/.devcontainer/launch.sh
new file mode 100755
index 000000000..157a49bef
--- /dev/null
+++ b/.devcontainer/launch.sh
@@ -0,0 +1,58 @@
+#! /usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+launch_devcontainer() {
+
+ # Ensure we're in the repo root
+ cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )/..";
+
+ if [[ -z $1 ]] || [[ -z $2 ]]; then
+ echo "Usage: $0 [CUDA version] [Host compiler]"
+ echo "Example: $0 12.1 gcc12"
+ return 1
+ fi
+
+ local cuda_version="$1"
+ local host_compiler="$2"
+ local workspace="$(basename "$(pwd)")";
+ local tmpdir="$(mktemp -d)/${workspace}";
+ local path="$(pwd)/.devcontainer/cuda${cuda_version}-${host_compiler}";
+
+ mkdir -p "${tmpdir}";
+ mkdir -p "${tmpdir}/.devcontainer";
+ cp -arL "$path/devcontainer.json" "${tmpdir}/.devcontainer";
+ sed -i "s@\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json";
+ path="${tmpdir}";
+
+ local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')";
+ local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cuCollections";
+
+ echo "devcontainer URL: ${url}";
+
+ local launch="";
+ if type open >/dev/null 2>&1; then
+ launch="open";
+ elif type xdg-open >/dev/null 2>&1; then
+ launch="xdg-open";
+ fi
+
+ if [ -n "${launch}" ]; then
+ code --new-window "${tmpdir}";
+ exec "${launch}" "${url}" >/dev/null 2>&1;
+ fi
+}
+
+launch_devcontainer "$@";
\ No newline at end of file
diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh
new file mode 100755
index 000000000..700dc3713
--- /dev/null
+++ b/.devcontainer/make_devcontainers.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script parses the CI matrix.yaml file and generates a devcontainer.json file for each unique combination of
+# CUDA version, compiler name/version, and Ubuntu version. The devcontainer.json files are written to the
+# .devcontainer directory to a subdirectory named after the CUDA version and compiler name/version.
+# GitHub docs on using multiple devcontainer.json files:
+# https://docs.github.com/en/codespaces/setting-up-your-project-for-codespaces/adding-a-dev-container-configuration/introduction-to-dev-containers#devcontainerjson
+
+# Ensure the script is being executed in its containing directory
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
+
+# The root devcontainer.json file is used as a template for all other devcontainer.json files
+# by replacing the `image:` field with the appropriate image name
+base_devcontainer_file="./devcontainer.json"
+
+
+# Read matrix.yaml and convert it to json
+matrix_json=$(yq -o json ../ci/matrix.yml)
+
+
+# Get the devcontainer image version and define image tag root
+DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version')
+IMAGE_ROOT="rapidsai/devcontainers:${DEVCONTAINER_VERSION}-cpp-"
+
+# Get unique combinations of cuda version, compiler name/version, and Ubuntu version
+combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_version: .compiler.version, os: .os}] | unique | .[]')
+
+# For each unique combination
+for combination in $combinations; do
+ cuda_version=$(echo "$combination" | jq -r '.cuda')
+ compiler_name=$(echo "$combination" | jq -r '.compiler_name')
+ compiler_version=$(echo "$combination" | jq -r '.compiler_version')
+ os=$(echo "$combination" | jq -r '.os')
+
+ name="cuda$cuda_version-$compiler_name$compiler_version"
+ mkdir -p "$name"
+ devcontainer_file="$name/devcontainer.json"
+ image="$IMAGE_ROOT$compiler_name$compiler_version-cuda$cuda_version-$os"
+
+ # Use the base_devcontainer.json as a template, plug in the CUDA, compiler names, versions, and Ubuntu version,
+ # and write the output to the new devcontainer.json file
+ #jq --arg image "$image" --arg name "$name" '. + {image: $image, name: $name}' $base_devcontainer_file > "$devcontainer_file"
+ jq --arg image "$image" --arg name "$name" '.image = $image | .name = $name | .containerEnv.DEVCONTAINER_NAME = $name' $base_devcontainer_file > "$devcontainer_file"
+
+ echo "Created $devcontainer_file"
+done
\ No newline at end of file
diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml
new file mode 100644
index 000000000..fbbe49b54
--- /dev/null
+++ b/.github/actions/compute-matrix/action.yml
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Compute Matrix
+description: "Compute the matrix for a given matrix type from the specified matrix file"
+
+inputs:
+ matrix_query:
+ description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
+ required: true
+ matrix_file:
+ description: 'The file containing the matrix'
+ required: true
+outputs:
+ matrix:
+ description: 'The requested matrix'
+ value: ${{ steps.compute-matrix.outputs.MATRIX }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute matrix
+ id: compute-matrix
+ run: |
+ MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} )
+ echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
+ shell: bash -euxo pipefail {0}
\ No newline at end of file
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
new file mode 100755
index 000000000..64a6f5642
--- /dev/null
+++ b/.github/actions/compute-matrix/compute-matrix.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+# Check for the correct number of arguments
+if [ $# -ne 2 ]; then
+ echo "Usage: $0 MATRIX_FILE MATRIX_QUERY"
+ echo "MATRIX_FILE: The path to the matrix file."
+ echo "MATRIX_QUERY: The jq query used to specify the desired matrix. e.g., '.pull-request.nvcc'"
+ exit 1
+fi
+
+# Get realpath before changing directory
+MATRIX_FILE=$(realpath "$1")
+MATRIX_QUERY="$2"
+
+# Ensure the script is being executed in its containing directory
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
+
+echo "Input matrix file:" >&2
+cat "$MATRIX_FILE" >&2
+echo "Query: $MATRIX_QUERY" >&2
+echo $(yq -o=json "$MATRIX_FILE" | jq -c -r "$MATRIX_QUERY | map(. as \$o | {std: .std[]} + del(\$o.std))")
\ No newline at end of file
diff --git a/.github/actions/configure_cccl_sccache/action.yml b/.github/actions/configure_cccl_sccache/action.yml
new file mode 100644
index 000000000..458669688
--- /dev/null
+++ b/.github/actions/configure_cccl_sccache/action.yml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Set up AWS credentials and environment variables for sccache
+description: "Set up AWS credentials and environment variables for sccache"
+runs:
+ using: "composite"
+ steps:
+ - name: Get AWS credentials for sccache bucket
+ uses: aws-actions/configure-aws-credentials@v2
+ with:
+ role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+ aws-region: us-east-2
+ role-duration-seconds: 43200 # 12 hours
+ - name: Set environment variables
+ run: |
+ echo "SCCACHE_BUCKET=rapids-sccache-east" >> $GITHUB_ENV
+ echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
+ echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
+ echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
+ echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
+ shell: bash
\ No newline at end of file
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 000000000..895ba83ee
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/workflows/add_to_project.yml b/.github/workflows/add_to_project.yml
deleted file mode 100644
index 72dd4acd2..000000000
--- a/.github/workflows/add_to_project.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Add new issue/PR to project
-
-on:
- issues:
- types:
- - opened
-
- pull_request_target:
- types:
- - opened
-
-jobs:
- add-to-project:
- name: Add issue or PR to project
- runs-on: ubuntu-latest
- steps:
- - name: Generate token
- id: generate_token
- uses: tibdex/github-app-token@36464acb844fc53b9b8b2401da68844f6b05ebb0
- with:
- app_id: ${{ secrets.CCCL_AUTH_APP_ID }}
- private_key: ${{ secrets.CCCL_AUTH_APP_PEM }}
- - name: Add to Project
- env:
- TOKEN: ${{ steps.generate_token.outputs.token }}
- uses: actions/add-to-project@v0.3.0
- with:
- project-url: https://github.com/orgs/NVIDIA/projects/6
- github-token: ${{ env.TOKEN }}
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
new file mode 100644
index 000000000..6599e9dcb
--- /dev/null
+++ b/.github/workflows/build-and-test.yml
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: build and test
+
+defaults:
+ run:
+ shell: bash -eo pipefail {0}
+
+on:
+ workflow_call:
+ inputs:
+ devcontainer_version: {type: string, required: true}
+ cuda_version: {type: string, required: true}
+ compiler: {type: string, required: true}
+ compiler_exe: {type: string, required: true}
+ compiler_version: {type: string, required: true}
+ std: {type: string, required: true}
+ gpu_build_archs: {type: string, required: true}
+ cpu: {type: string, required: true}
+ os: {type: string, required: true}
+ build_script: {type: string, required: false}
+ test_script: {type: string, required: false}
+ run_tests: {type: boolean, required: false, default: true}
+
+jobs:
+ devcontainer_image:
+ name: Devcontainer ${{ inputs.os }}/${{ inputs.compiler }}${{ inputs.compiler_version }}
+ runs-on: ubuntu-latest
+ outputs:
+ image_name: ${{ steps.compute-devcontainer-image-name.outputs.name }}
+ steps:
+ - name: Compute devcontainer image name
+ id: compute-devcontainer-image-name
+ run: |
+ COMPILER_SEGMENT=""
+ if [ "${{ inputs.compiler }}" != "cc" ] && [ "${{ inputs.compiler_exe }}" != "c++" ]; then
+ COMPILER_SEGMENT="${{ inputs.compiler }}${{ inputs.compiler_version }}-"
+ fi
+ DEVCONTAINER_IMAGE="rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${COMPILER_SEGMENT}cuda${{inputs.cuda_version}}-${{inputs.os}}"
+ echo "DEVCONTAINER_IMAGE=$DEVCONTAINER_IMAGE" >> $GITHUB_ENV
+ echo "name=$DEVCONTAINER_IMAGE" >> $GITHUB_OUTPUT
+ - name: Check if devcontainer image exists
+ run: |
+ docker buildx imagetools inspect $DEVCONTAINER_IMAGE > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "Error: Docker image $DEVCONTAINER_IMAGE does not exist."
+ exit 1
+ fi
+
+ build:
+ needs: devcontainer_image
+ if: inputs.build_script != '' && needs.devcontainer_image.outputs.image_name != ''
+ name: Build ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}/SM${{inputs.gpu_build_archs}}
+ uses: ./.github/workflows/run-as-coder.yml
+ with:
+ name: Build ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}/SM${{inputs.gpu_build_archs}}
+ runner: linux-${{inputs.cpu}}-cpu16
+ image: ${{ needs.devcontainer_image.outputs.image_name }}
+ command: |
+ ${{ inputs.build_script }} "${{inputs.compiler_exe}}" "${{inputs.std}}" "${{inputs.gpu_build_archs}}"
+
+ test:
+ needs: [devcontainer_image, build]
+ if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.test_script != '' && needs.devcontainer_image.outputs.image_name != '' && inputs.run_tests}}
+ name: Test ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}/SM${{inputs.gpu_build_archs}}
+ uses: ./.github/workflows/run-as-coder.yml
+ with:
+ name: Test ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}/SM${{inputs.gpu_build_archs}}
+ runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
+ image: ${{ needs.devcontainer_image.outputs.image_name }}
+ command: |
+ nvidia-smi
+ ${{ inputs.test_script }} "${{inputs.compiler_exe}}" "${{inputs.std}}" "${{inputs.gpu_build_archs}}"
\ No newline at end of file
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
new file mode 100644
index 000000000..dea71e00e
--- /dev/null
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Dispatch build and test
+
+on:
+ workflow_call:
+ inputs:
+ per_cuda_compiler_matrix: {type: string, required: true}
+ build_script: {type: string, required: false}
+ test_script: {type: string, required: false}
+ devcontainer_version: {type: string, required: true}
+
+jobs:
+ # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
+ # ensures that the build/test steps can overlap across different configurations. For example,
+ # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
+ build_and_test:
+ name: ${{matrix.cpu}}
+ uses: ./.github/workflows/build-and-test.yml
+ strategy:
+ fail-fast: false
+ matrix:
+ include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+ with:
+ devcontainer_version: ${{ inputs.devcontainer_version }}
+ cuda_version: ${{ matrix.cuda }}
+ compiler: ${{ matrix.compiler.name }}
+ compiler_exe: ${{ matrix.compiler.exe }}
+ compiler_version: ${{ matrix.compiler.version }}
+ std: ${{ matrix.std }}
+ gpu_build_archs: ${{ matrix.gpu_build_archs }}
+ cpu: ${{ matrix.cpu }}
+ os: ${{ matrix.os }}
+ build_script: ${{ inputs.build_script }}
+ test_script: ${{ inputs.test_script }}
+ run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
new file mode 100644
index 000000000..061b30a99
--- /dev/null
+++ b/.github/workflows/pr.yml
@@ -0,0 +1,121 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: pr
+
+defaults:
+ run:
+ shell: bash -euo pipefail {0}
+
+on:
+ push:
+ branches:
+ - main
+ - dev
+ - "pull-request/[0-9]+"
+
+# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts.
+concurrency:
+ group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+ cancel-in-progress: true
+
+jobs:
+ doxygen-check:
+ name: Doxygen check
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ - name: Install Doxygen
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -y doxygen
+ - name: Check Doxygen docs
+ run: |
+ ./ci/pre-commit/doxygen.sh
+ if [ $? -ne 0 ]; then
+ echo "Doxygen check failed"
+ exit 1
+ fi
+ shell: bash -euxo pipefail {0}
+
+ get-devcontainer-version:
+ name: Get devcontainer version
+ runs-on: ubuntu-latest
+ outputs:
+ DEVCONTAINER_VERSION: ${{ steps.set-outputs.outputs.DEVCONTAINER_VERSION }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ - name: Get devcontainer version
+ id: set-outputs
+ run: |
+ DEVCONTAINER_VERSION=$(yq -o json ci/matrix.yml | jq -r '.devcontainer_version')
+ echo "DEVCONTAINER_VERSION=$DEVCONTAINER_VERSION" | tee -a "$GITHUB_OUTPUT"
+
+ compute-nvcc-matrix:
+ name: Compute NVCC matrix
+ runs-on: ubuntu-latest
+ outputs:
+ FULL_MATRIX: ${{ steps.set-outputs.outputs.FULL_MATRIX }}
+ CUDA_VERSIONS: ${{ steps.set-outputs.outputs.CUDA_VERSIONS }}
+ HOST_COMPILERS: ${{ steps.set-outputs.outputs.HOST_COMPILERS }}
+ PER_CUDA_COMPILER_MATRIX: ${{ steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ - name: Get full nvcc matrix
+ id: compute-nvcc-matrix
+ uses: ./.github/actions/compute-matrix
+ with:
+ matrix_file: './ci/matrix.yml'
+ matrix_query: '.pull_request.nvcc'
+ - name: Set outputs
+ id: set-outputs
+ run: |
+ FULL_MATRIX='${{steps.compute-nvcc-matrix.outputs.matrix}}'
+ echo "FULL_MATRIX=$FULL_MATRIX" | tee -a "$GITHUB_OUTPUT"
+ CUDA_VERSIONS=$(echo $FULL_MATRIX | jq -c '[.[] | .cuda] | unique')
+ echo "CUDA_VERSIONS=$CUDA_VERSIONS" | tee -a "$GITHUB_OUTPUT"
+ HOST_COMPILERS=$(echo $FULL_MATRIX | jq -c '[.[] | .compiler.name] | unique')
+ echo "HOST_COMPILERS=$HOST_COMPILERS" | tee -a "$GITHUB_OUTPUT"
+ PER_CUDA_COMPILER_MATRIX=$(echo $FULL_MATRIX | jq -c ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')
+ echo "PER_CUDA_COMPILER_MATRIX=$PER_CUDA_COMPILER_MATRIX" | tee -a "$GITHUB_OUTPUT"
+
+ ci:
+ name: CUDA${{ matrix.cuda_version }} ${{ matrix.compiler }}
+ needs: [compute-nvcc-matrix, get-devcontainer-version]
+ uses: ./.github/workflows/dispatch-build-and-test.yml
+ strategy:
+ fail-fast: false
+ matrix:
+ cuda_version: ${{ fromJSON(needs.compute-nvcc-matrix.outputs.CUDA_VERSIONS) }}
+ compiler: ${{ fromJSON(needs.compute-nvcc-matrix.outputs.HOST_COMPILERS) }}
+ with:
+ per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-nvcc-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
+ build_script: "./ci/build.sh"
+ test_script: "./ci/test.sh"
+ devcontainer_version: ${{ needs.get-devcontainer-version.outputs.DEVCONTAINER_VERSION }}
+
+ # This job is the final job that runs after all other jobs and is used for branch protection status checks.
+ # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
+ ci-success:
+ runs-on: ubuntu-latest
+ name: CI success
+ needs:
+ - ci
+ steps:
+ - run: echo "CI success"
\ No newline at end of file
diff --git a/.github/workflows/run-as-coder.yml b/.github/workflows/run-as-coder.yml
new file mode 100644
index 000000000..573ef134a
--- /dev/null
+++ b/.github/workflows/run-as-coder.yml
@@ -0,0 +1,66 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Run as coder user
+
+defaults:
+ run:
+ shell: bash -exo pipefail {0}
+
+
+on:
+ workflow_call:
+ inputs:
+ name: {type: string, required: true}
+ image: {type: string, required: true}
+ runner: {type: string, required: true}
+ command: {type: string, required: true}
+ env: { type: string, required: false, default: "" }
+
+jobs:
+ run-as-coder:
+ name: ${{inputs.name}}
+ runs-on: ${{inputs.runner}}
+ container:
+ options: -u root
+ image: ${{inputs.image}}
+ env:
+ NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+ permissions:
+ id-token: write
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ with:
+ path: cuCollections
+ persist-credentials: false
+ - name: Move files to coder user home directory
+ run: |
+ cp -R cuCollections /home/coder/cuCollections
+ chown -R coder:coder /home/coder/
+ - name: Configure credentials and environment variables for sccache
+ uses: ./cuCollections/.github/actions/configure_cccl_sccache
+ - name: Run command
+ shell: su coder {0}
+ run: |
+ set -exo pipefail
+ cd ~/cuCollections
+ eval "${{inputs.command}}" || exit_code=$?
+ if [ ! -z "$exit_code" ]; then
+ echo "::error::Error! To checkout the corresponding code and reproduce locally, run the following commands:"
+ echo "git clone --branch $GITHUB_REF_NAME --single-branch --recurse-submodules https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
+ echo "docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
+ exit $exit_code
+ fi
diff --git a/.gitignore b/.gitignore
index 4146530ed..6ccf378c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,6 @@ __pycache__
*.dylib
.cache
.vscode
-.devcontainer
*.code-workspace
*.swp
*.pytest_cache
@@ -140,3 +139,6 @@ ENV/
# clang
compile_commands.json
+
+# figures
+*.eps
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2fe04169..5679bf67f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
hooks:
- id: doxygen-check
name: doxygen-check
- entry: ./ci/checks/doxygen.sh
+ entry: ./ci/pre-commit/doxygen.sh
files: ^include/
types_or: [file]
language: system
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e1b5055d9..f3ca85a8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
#=============================================================================
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,10 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================
-cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
- file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake
+ file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/RAPIDS.cmake
${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
endif()
include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
diff --git a/README.md b/README.md
index dc8d4db80..93ac04027 100644
--- a/README.md
+++ b/README.md
@@ -5,13 +5,13 @@
Doxygen Documentation (TODO) |
-`cuCollections` (`cuco`) is an open-source, header-only library of GPU-accelerated, concurrent data structures.
+`cuCollections` (`cuco`) is an open-source, header-only library of GPU-accelerated, concurrent data structures.
-Similar to how [Thrust](https://github.com/thrust/thrust) and [CUB](https://github.com/thrust/cub) provide STL-like, GPU accelerated algorithms and primitives, `cuCollections` provides STL-like concurrent data structures. `cuCollections` is not a one-to-one, drop-in replacement for STL data structures like `std::unordered_map`. Instead, it provides functionally similar data structures tailored for efficient use with GPUs.
+Similar to how [Thrust](https://github.com/thrust/thrust) and [CUB](https://github.com/thrust/cub) provide STL-like, GPU accelerated algorithms and primitives, `cuCollections` provides STL-like concurrent data structures. `cuCollections` is not a one-to-one, drop-in replacement for STL data structures like `std::unordered_map`. Instead, it provides functionally similar data structures tailored for efficient use with GPUs.
## Development Status
-`cuCollections` is still under heavy development. Users should expect breaking changes and refactoring to be common.
+`cuCollections` is still under heavy development. Users should expect breaking changes and refactoring to be common.
## Getting cuCollections
@@ -21,14 +21,14 @@ Similar to how [Thrust](https://github.com/thrust/thrust) and [CUB](https://gith
`cuCollections` is designed to make it easy to include within another CMake project.
The `CMakeLists.txt` exports a `cuco` target that can be linked[1](#link-footnote)
- into a target to setup include directories, dependencies, and compile flags necessary to use `cuCollections` in your project.
+ into a target to setup include directories, dependencies, and compile flags necessary to use `cuCollections` in your project.
We recommend using [CMake Package Manager (CPM)](https://github.com/TheLartians/CPM.cmake) to fetch `cuCollections` into your project.
With CPM, getting `cuCollections` is easy:
-```
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+```cmake
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
include(path/to/CPM.cmake)
@@ -47,12 +47,12 @@ target_link_libraries(my_library cuco)
This will take care of downloading `cuCollections` from GitHub and making the headers available in a location that can be found by CMake. Linking against the `cuco` target will provide everything needed for `cuco` to be used by the `my_library` target.
-1: `cuCollections` is header-only and therefore there is no binary component to "link" against. The linking terminology comes from CMake's `target_link_libraries` which is still used even for header-only library targets.
+1: `cuCollections` is header-only and therefore there is no binary component to "link" against. The linking terminology comes from CMake's `target_link_libraries` which is still used even for header-only library targets.
## Requirements
-- `nvcc 11+`
+- `nvcc 11.5+`
- C++17
-- Volta+
+- Volta+
- Pascal is partially supported. Any data structures that require blocking algorithms are not supported. See [libcu++](https://nvidia.github.io/libcudacxx/setup/requirements.html#device-architectures) documentation for more details.
## Dependencies
@@ -67,15 +67,15 @@ No action is required from the user to satisfy these dependencies. `cuCollection
## Building cuCollections
-Since `cuCollections` is header-only, there is nothing to build to use it.
+Since `cuCollections` is header-only, there is nothing to build to use it.
To build the tests, benchmarks, and examples:
-```
+```bash
cd $CUCO_ROOT
mkdir -p build
cd build
-cmake ..
+cmake ..
make
```
Binaries will be built into:
@@ -179,23 +179,32 @@ class example_class {
## Data Structures
-We plan to add many GPU-accelerated, concurrent data structures to `cuCollections`. As of now, the two flagships are variants of hash tables.
+We plan to add many GPU-accelerated, concurrent data structures to `cuCollections`. As of now, the two flagships are variants of hash tables.
+
+### `static_set`
+
+`cuco::static_set` is a fixed-size container that stores unique elements in no particular order. See the Doxygen documentation in `static_set.cuh` for more detailed information.
+
+#### Examples:
+- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/Pzf6vabz1))
+- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/sfG3qKqGv))
### `static_map`
`cuco::static_map` is a fixed-size hash table using open addressing with linear probing. See the Doxygen documentation in `static_map.cuh` for more detailed information.
#### Examples:
-- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/ervPzqh64))
-- [Device-view APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_view_example.cu) (see [live example in godbolt](https://godbolt.org/z/qMWrfE6ET))
-- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/oGfYjzMGT))
+- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/T49P85Mnd))
+- [Device-view APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_view_example.cu) (see [live example in godbolt](https://godbolt.org/z/dh8bMn3G1))
+- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/7djKevK6e))
+- [Key histogram](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/count_by_key_example.cu) (see [live example in godbolt](https://godbolt.org/z/vecGeYM48))
### `static_multimap`
`cuco::static_multimap` is a fixed-size hash table that supports storing equivalent keys. It uses double hashing by default and supports switching to linear probing. See the Doxygen documentation in `static_multimap.cuh` for more detailed information.
#### Examples:
-- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_multimap/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/Po4eTEn1a))
+- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_multimap/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/PrbqG6ae4))
### `dynamic_map`
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index a037dc603..3635336e8 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
#=============================================================================
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,20 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================
-cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-
-CPMAddPackage(
- NAME benchmark
- GITHUB_REPOSITORY google/benchmark
- VERSION 1.5.2
- OPTIONS
- "BENCHMARK_ENABLE_TESTING Off"
- # The REGEX feature test fails when gbench's cmake is run under CPM w/ gcc5.4 because it doesn't assume C++11
- # Additionally, attempting to set the CMAKE_CXX_VERSION here doesn't propogate to the feature test build
- # Therefore, we just disable the feature test and assume platforms we care about have a regex impl available
- "RUN_HAVE_STD_REGEX 0" #
- "BENCHMARK_ENABLE_INSTALL OFF"
-)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
CPMAddPackage(
NAME nvbench
@@ -41,65 +28,58 @@ CPMAddPackage(
###################################################################################################
###################################################################################################
-function(ConfigureBench BENCH_NAME BENCH_SRC)
- add_executable(${BENCH_NAME} "${BENCH_SRC}")
- set_target_properties(${BENCH_NAME} PROPERTIES
- POSITION_INDEPENDENT_CODE ON
- RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks")
- target_include_directories(${BENCH_NAME} PRIVATE
- "${CMAKE_CURRENT_SOURCE_DIR}")
- target_compile_options(${BENCH_NAME} PRIVATE --compiler-options=-Wall --compiler-options=-Wextra
- --expt-extended-lambda --expt-relaxed-constexpr -Xcompiler -Wno-subobject-linkage)
- target_link_libraries(${BENCH_NAME} PRIVATE
- benchmark benchmark_main
- pthread
- cuco
- CUDA::cudart)
-endfunction(ConfigureBench)
-
-###################################################################################################
-function(ConfigureNVBench BENCH_NAME)
+function(ConfigureBench BENCH_NAME)
add_executable(${BENCH_NAME} ${ARGN})
set_target_properties(${BENCH_NAME} PROPERTIES
POSITION_INDEPENDENT_CODE ON
- RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/nvbenchmarks")
+ RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
target_include_directories(${BENCH_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}")
- #"${NVBench_SOURCE_DIR}")
- target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr)
+ target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -lineinfo)
target_link_libraries(${BENCH_NAME} PRIVATE
nvbench::main
pthread
cuco)
-endfunction(ConfigureNVBench)
+endfunction(ConfigureBench)
###################################################################################################
### benchmark sources #############################################################################
###################################################################################################
###################################################################################################
-# - dynamic_map benchmarks ------------------------------------------------------------------------
-set(DYNAMIC_MAP_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/hash_table/dynamic_map_bench.cu")
-ConfigureBench(DYNAMIC_MAP_BENCH "${DYNAMIC_MAP_BENCH_SRC}")
+# - static_set benchmarks -------------------------------------------------------------------------
+ConfigureBench(STATIC_SET_BENCH
+ hash_table/static_set/contains_bench.cu
+ hash_table/static_set/find_bench.cu
+ hash_table/static_set/insert_bench.cu
+ hash_table/static_set/retrieve_all_bench.cu
+ hash_table/static_set/size_bench.cu)
###################################################################################################
# - static_map benchmarks -------------------------------------------------------------------------
-set(STATIC_MAP_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/hash_table/static_map_bench.cu")
-ConfigureBench(STATIC_MAP_BENCH "${STATIC_MAP_BENCH_SRC}")
+ConfigureBench(STATIC_MAP_BENCH
+ hash_table/static_map/insert_bench.cu
+ hash_table/static_map/find_bench.cu
+ hash_table/static_map/contains_bench.cu
+ hash_table/static_map/erase_bench.cu)
###################################################################################################
# - static_multimap benchmarks --------------------------------------------------------------------
-ConfigureNVBench(STATIC_MULTIMAP_BENCH
- hash_table/static_multimap/count_bench.cu
+ConfigureBench(STATIC_MULTIMAP_BENCH
hash_table/static_multimap/insert_bench.cu
- hash_table/static_multimap/pair_retrieve_bench.cu
+ hash_table/static_multimap/retrieve_bench.cu
hash_table/static_multimap/query_bench.cu
- hash_table/static_multimap/retrieve_bench.cu)
+ hash_table/static_multimap/count_bench.cu)
-ConfigureNVBench(RETRIEVE_BENCH
- hash_table/static_multimap/optimal_retrieve_bench.cu)
+###################################################################################################
+# - dynamic_map benchmarks ------------------------------------------------------------------------
+ConfigureBench(DYNAMIC_MAP_BENCH
+ hash_table/dynamic_map/insert_bench.cu
+ hash_table/dynamic_map/find_bench.cu
+ hash_table/dynamic_map/contains_bench.cu
+ hash_table/dynamic_map/erase_bench.cu)
###################################################################################################
-# - reduce_by_key benchmarks ----------------------------------------------------------------------
-set(RBK_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/reduce_by_key/reduce_by_key.cu")
-ConfigureBench(RBK_BENCH "${RBK_BENCH_SRC}")
+# - hash function benchmarks ----------------------------------------------------------------------
+ConfigureBench(HASH_BENCH
+ hash_bench.cu)
diff --git a/benchmarks/defaults.hpp b/benchmarks/defaults.hpp
new file mode 100644
index 000000000..22e4f5338
--- /dev/null
+++ b/benchmarks/defaults.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+#include
+#include
+
+namespace cuco::benchmark::defaults {
+
+using KEY_TYPE_RANGE = nvbench::type_list;
+using VALUE_TYPE_RANGE = nvbench::type_list;
+
+auto constexpr N = 100'000'000;
+auto constexpr OCCUPANCY = 0.5;
+auto constexpr MULTIPLICITY = 8;
+auto constexpr MATCHING_RATE = 0.5;
+auto constexpr MAX_NOISE = 3;
+auto constexpr SKEW = 0.5;
+auto constexpr BATCH_SIZE = 1'000'000;
+auto constexpr INITIAL_SIZE = 50'000'000;
+
+auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
+auto const N_RANGE_CACHE =
+ std::vector{8'000, 80'000, 800'000, 8'000'000, 80'000'000};
+auto const OCCUPANCY_RANGE = nvbench::range(0.1, 0.9, 0.1);
+auto const MULTIPLICITY_RANGE = std::vector{1, 2, 4, 8, 16};
+auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1);
+auto const SKEW_RANGE = nvbench::range(0.1, 1., 0.1);
+
+} // namespace cuco::benchmark::defaults
diff --git a/benchmarks/hash_bench.cu b/benchmarks/hash_bench.cu
new file mode 100644
index 000000000..ec35c186e
--- /dev/null
+++ b/benchmarks/hash_bench.cu
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+
+#include
+
+#include
+
+#include
+
+template
+struct large_key {
+ constexpr __host__ __device__ large_key(int32_t seed) noexcept
+ {
+#pragma unroll Words
+ for (int32_t i = 0; i < Words; ++i) {
+ data_[i] = seed;
+ }
+ }
+
+ private:
+ int32_t data_[Words];
+};
+
+template
+__global__ void hash_bench_kernel(Hasher hash,
+ cuco::detail::index_type n,
+ OutputIt out,
+ bool materialize_result)
+{
+ cuco::detail::index_type const gid = BlockSize * blockIdx.x + threadIdx.x;
+ cuco::detail::index_type const loop_stride = gridDim.x * BlockSize;
+ cuco::detail::index_type idx = gid;
+ typename Hasher::result_type agg = 0;
+
+ while (idx < n) {
+ typename Hasher::argument_type key(idx);
+ for (int32_t i = 0; i < 100; ++i) { // execute hash func 100 times
+ agg += hash(key);
+ }
+ idx += loop_stride;
+ }
+
+ if (materialize_result) { out[gid] = agg; }
+}
+
+/**
+ * @brief A benchmark evaluating performance of various hash functions
+ */
+template
+void hash_eval(nvbench::state& state, nvbench::type_list)
+{
+ bool const materialize_result = false;
+ constexpr auto block_size = 128;
+ auto const num_keys = state.get_int64_or_default("NumInputs", cuco::benchmark::defaults::N * 10);
+ auto const grid_size = (num_keys + block_size * 16 - 1) / block_size * 16;
+
+ thrust::device_vector hash_values((materialize_result) ? num_keys
+ : 1);
+
+ state.add_element_count(num_keys);
+
+ state.exec([&](nvbench::launch& launch) {
+ hash_bench_kernel<<>>(
+ Hash{}, num_keys, hash_values.begin(), materialize_result);
+ });
+}
+
+NVBENCH_BENCH_TYPES(
+ hash_eval,
+ NVBENCH_TYPE_AXES(nvbench::type_list,
+ cuco::murmurhash3_32,
+ cuco::murmurhash3_32>, // 32*4bytes
+ cuco::xxhash_32,
+ cuco::xxhash_32,
+ cuco::xxhash_32>,
+ cuco::xxhash_64,
+ cuco::xxhash_64,
+ cuco::xxhash_64>,
+ cuco::murmurhash3_fmix_32,
+ cuco::murmurhash3_fmix_64>))
+ .set_name("hash_function_eval")
+ .set_type_axes_names({"Hash"})
+ .set_max_noise(cuco::benchmark::defaults::MAX_NOISE);
diff --git a/benchmarks/hash_table/dynamic_map/contains_bench.cu b/benchmarks/hash_table/dynamic_map/contains_bench.cu
new file mode 100644
index 000000000..ff349bc53
--- /dev/null
+++ b/benchmarks/hash_table/dynamic_map/contains_bench.cu
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::dynamic_map::contains` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_contains(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const initial_size = state.get_int64_or_default("InitSize", defaults::INITIAL_SIZE);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ cuco::dynamic_map map{
+ static_cast(initial_size), cuco::empty_key{-1}, cuco::empty_value{-1}};
+ map.insert(pairs.begin(), pairs.end());
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ thrust::device_vector result(num_keys);
+
+ state.add_element_count(num_keys);
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ map.contains(keys.begin(), keys.end(), result.begin(), {}, {}, launch.get_stream());
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> dynamic_map_contains(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(dynamic_map_contains,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_contains_unique_num_inputs")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("NumInputs", defaults::N_RANGE);
+
+NVBENCH_BENCH_TYPES(dynamic_map_contains,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_contains_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/dynamic_map/erase_bench.cu b/benchmarks/hash_table/dynamic_map/erase_bench.cu
new file mode 100644
index 000000000..96f5ec7ec
--- /dev/null
+++ b/benchmarks/hash_table/dynamic_map/erase_bench.cu
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::dynamic_map::erase` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_erase(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const initial_size = state.get_int64_or_default("InitSize", defaults::INITIAL_SIZE);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(
+ keys.begin(), keys.end(), pairs.begin(), [] __device__(auto i) { return pair_type(i, {}); });
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ state.add_element_count(num_keys);
+
+ state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+ [&](nvbench::launch& launch, auto& timer) {
+ // dynamic map with erase support
+ cuco::dynamic_map map{static_cast(initial_size),
+ cuco::empty_key{-1},
+ cuco::empty_value{-1},
+ cuco::erased_key{-2}};
+ map.insert(pairs.begin(), pairs.end(), {}, {}, launch.get_stream());
+
+ timer.start();
+ map.erase(keys.begin(), keys.end(), {}, {}, launch.get_stream());
+ timer.stop();
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> dynamic_map_erase(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(dynamic_map_erase,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_erase_unique_num_inputs")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("NumInputs", defaults::N_RANGE);
+
+NVBENCH_BENCH_TYPES(dynamic_map_erase,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_erase_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/dynamic_map/find_bench.cu b/benchmarks/hash_table/dynamic_map/find_bench.cu
new file mode 100644
index 000000000..b06cfab4e
--- /dev/null
+++ b/benchmarks/hash_table/dynamic_map/find_bench.cu
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::dynamic_map::find` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_find(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const initial_size = state.get_int64_or_default("InitSize", defaults::INITIAL_SIZE);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ cuco::dynamic_map map{
+ static_cast(initial_size), cuco::empty_key{-1}, cuco::empty_value{-1}};
+ map.insert(pairs.begin(), pairs.end());
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ thrust::device_vector result(num_keys);
+
+ state.add_element_count(num_keys);
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ map.find(keys.begin(), keys.end(), result.begin(), {}, {}, launch.get_stream());
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> dynamic_map_find(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(dynamic_map_find,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_find_unique_num_inputs")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("NumInputs", defaults::N_RANGE);
+
+NVBENCH_BENCH_TYPES(dynamic_map_find,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_find_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/dynamic_map/insert_bench.cu b/benchmarks/hash_table/dynamic_map/insert_bench.cu
new file mode 100644
index 000000000..8e8cc8a84
--- /dev/null
+++ b/benchmarks/hash_table/dynamic_map/insert_bench.cu
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::dynamic_map::insert` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_insert(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const initial_size = state.get_int64_or_default("InitSize", defaults::INITIAL_SIZE);
+ auto const batch_size = state.get_int64_or_default("BatchSize", defaults::BATCH_SIZE);
+
+ if (num_keys % batch_size) { state.skip("NumInputs must be divisible by BatchSize."); }
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ state.add_element_count(num_keys);
+
+ state.exec(
+ nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+ cuco::dynamic_map map{static_cast(initial_size),
+ cuco::empty_key{-1},
+ cuco::empty_value{-1},
+ {},
+ launch.get_stream()};
+
+ timer.start();
+ for (std::size_t i = 0; i < num_keys; i += batch_size) {
+ map.insert(pairs.begin() + i, pairs.begin() + i + batch_size, {}, {}, launch.get_stream());
+ }
+ timer.stop();
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> dynamic_map_insert(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(dynamic_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_insert_unique_num_inputs")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("NumInputs", defaults::N_RANGE);
+
+NVBENCH_BENCH_TYPES(dynamic_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_insert_uniform_multiplicity")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("Multiplicity", defaults::MULTIPLICITY_RANGE);
+
+NVBENCH_BENCH_TYPES(dynamic_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("dynamic_map_insert_gaussian_skew")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Skew", defaults::SKEW_RANGE);
diff --git a/benchmarks/hash_table/dynamic_map_bench.cu b/benchmarks/hash_table/dynamic_map_bench.cu
deleted file mode 100644
index 90446ea57..000000000
--- a/benchmarks/hash_table/dynamic_map_bench.cu
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-
-#include
-
-#include
-
-#include
-#include
-
-enum class dist_type { UNIQUE, UNIFORM, GAUSSIAN };
-
-template
-static void generate_keys(OutputIt output_begin, OutputIt output_end)
-{
- auto num_keys = std::distance(output_begin, output_end);
-
- std::random_device rd;
- std::mt19937 gen{rd()};
-
- switch (Dist) {
- case dist_type::UNIQUE:
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = i;
- }
- break;
- case dist_type::UNIFORM:
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = std::abs(static_cast(gen()));
- }
- break;
- case dist_type::GAUSSIAN:
- std::normal_distribution<> dg{1e9, 1e7};
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = std::abs(static_cast(dg(gen)));
- }
- break;
- }
-}
-
-static void gen_final_size(benchmark::internal::Benchmark* b)
-{
- for (auto size = 10'000'000; size <= 150'000'000; size += 20'000'000) {
- b->Args({size});
- }
-}
-
-template
-static void BM_dynamic_insert(::benchmark::State& state)
-{
- using map_type = cuco::dynamic_map;
-
- std::size_t num_keys = state.range(0);
- std::size_t initial_size = 1 << 27;
-
- std::vector h_keys(num_keys);
- std::vector> h_pairs(num_keys);
-
- generate_keys(h_keys.begin(), h_keys.end());
-
- for (std::size_t i = 0; i < num_keys; ++i) {
- Key key = h_keys[i];
- Value val = h_keys[i];
- h_pairs[i].first = key;
- h_pairs[i].second = val;
- }
-
- thrust::device_vector> d_pairs(h_pairs);
-
- std::size_t batch_size = 1E6;
- for (auto _ : state) {
- map_type map{
- initial_size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}};
- {
- cuda_event_timer raii{state};
- for (std::size_t i = 0; i < num_keys; i += batch_size) {
- map.insert(d_pairs.begin() + i, d_pairs.begin() + i + batch_size);
- }
- }
- }
-
- state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
- int64_t(state.range(0)));
-}
-
-template
-static void BM_dynamic_search_all(::benchmark::State& state)
-{
- using map_type = cuco::dynamic_map;
-
- std::size_t num_keys = state.range(0);
- std::size_t initial_size = 1 << 27;
-
- std::vector h_keys(num_keys);
- std::vector> h_pairs(num_keys);
-
- generate_keys(h_keys.begin(), h_keys.end());
-
- for (std::size_t i = 0; i < num_keys; ++i) {
- Key key = h_keys[i];
- Value val = h_keys[i];
- h_pairs[i].first = key;
- h_pairs[i].second = val;
- }
-
- thrust::device_vector d_keys(h_keys);
- thrust::device_vector> d_pairs(h_pairs);
- thrust::device_vector d_results(num_keys);
-
- map_type map{
- initial_size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}};
- map.insert(d_pairs.begin(), d_pairs.end());
-
- for (auto _ : state) {
- cuda_event_timer raii{state};
- map.find(d_keys.begin(), d_keys.end(), d_results.begin());
- }
-
- state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
- int64_t(state.range(0)));
-}
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIQUE)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIQUE)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIFORM)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIFORM)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::GAUSSIAN)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::GAUSSIAN)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIQUE)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIQUE)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIFORM)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIFORM)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::GAUSSIAN)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
-
-BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::GAUSSIAN)
- ->Unit(benchmark::kMillisecond)
- ->Apply(gen_final_size)
- ->UseManualTime();
diff --git a/benchmarks/hash_table/static_map/contains_bench.cu b/benchmarks/hash_table/static_map/contains_bench.cu
new file mode 100644
index 000000000..0b5d482a1
--- /dev/null
+++ b/benchmarks/hash_table/static_map/contains_bench.cu
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::static_map::contains` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_contains(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const occupancy = state.get_float64_or_default("Occupancy", defaults::OCCUPANCY);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ std::size_t const size = num_keys / occupancy;
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ cuco::static_map map{size, cuco::empty_key{-1}, cuco::empty_value{-1}};
+ map.insert(pairs.begin(), pairs.end());
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ thrust::device_vector result(num_keys);
+
+ state.add_element_count(num_keys);
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ map.contains(keys.begin(), keys.end(), result.begin(), {}, {}, launch.get_stream());
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> static_map_contains(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(static_map_contains,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_contains_unique_occupancy")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
+
+NVBENCH_BENCH_TYPES(static_map_contains,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_contains_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/static_map/erase_bench.cu b/benchmarks/hash_table/static_map/erase_bench.cu
new file mode 100644
index 000000000..c6e56eb07
--- /dev/null
+++ b/benchmarks/hash_table/static_map/erase_bench.cu
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::static_map::erase` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_erase(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const occupancy = state.get_float64_or_default("Occupancy", defaults::OCCUPANCY);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ std::size_t const size = num_keys / occupancy;
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(
+ keys.begin(), keys.end(), pairs.begin(), [] __device__(auto i) { return pair_type(i, {}); });
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ state.add_element_count(num_keys);
+
+ state.exec(
+ nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+ // static map with erase support
+ cuco::static_map map{
+ size, cuco::empty_key{-1}, cuco::empty_value{-1}, cuco::erased_key{-2}};
+ map.insert(pairs.begin(), pairs.end(), {}, {}, launch.get_stream());
+
+ timer.start();
+ map.erase(keys.begin(), keys.end(), {}, {}, launch.get_stream());
+ timer.stop();
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> static_map_erase(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(static_map_erase,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_erase_unique_occupancy")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
+
+NVBENCH_BENCH_TYPES(static_map_erase,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_erase_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/static_map/find_bench.cu b/benchmarks/hash_table/static_map/find_bench.cu
new file mode 100644
index 000000000..276a35e0b
--- /dev/null
+++ b/benchmarks/hash_table/static_map/find_bench.cu
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::static_map::find` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_find(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const occupancy = state.get_float64_or_default("Occupancy", defaults::OCCUPANCY);
+ auto const matching_rate = state.get_float64_or_default("MatchingRate", defaults::MATCHING_RATE);
+
+ std::size_t const size = num_keys / occupancy;
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ cuco::static_map map{size, cuco::empty_key{-1}, cuco::empty_value{-1}};
+ map.insert(pairs.begin(), pairs.end());
+
+ gen.dropout(keys.begin(), keys.end(), matching_rate);
+
+ thrust::device_vector result(num_keys);
+
+ state.add_element_count(num_keys);
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ map.find(keys.begin(), keys.end(), result.begin(), {}, {}, launch.get_stream());
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> static_map_find(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(static_map_find,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_find_unique_occupancy")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
+
+NVBENCH_BENCH_TYPES(static_map_find,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_find_unique_matching_rate")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);
diff --git a/benchmarks/hash_table/static_map/insert_bench.cu b/benchmarks/hash_table/static_map/insert_bench.cu
new file mode 100644
index 000000000..ef997bef8
--- /dev/null
+++ b/benchmarks/hash_table/static_map/insert_bench.cu
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::static_map::insert` performance
+ */
+template
+std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_insert(
+ nvbench::state& state, nvbench::type_list)
+{
+ using pair_type = cuco::pair;
+
+ auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N);
+ auto const occupancy = state.get_float64_or_default("Occupancy", defaults::OCCUPANCY);
+
+ std::size_t const size = num_keys / occupancy;
+
+ thrust::device_vector keys(num_keys);
+
+ key_generator gen;
+ gen.generate(dist_from_state(state), keys.begin(), keys.end());
+
+ thrust::device_vector pairs(num_keys);
+ thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
+ return pair_type(key, {});
+ });
+
+ state.add_element_count(num_keys);
+
+ state.exec(
+ nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+ cuco::static_map map{
+ size, cuco::empty_key{-1}, cuco::empty_value{-1}, {}, launch.get_stream()};
+
+ timer.start();
+ map.insert(pairs.begin(), pairs.end(), {}, {}, launch.get_stream());
+ timer.stop();
+ });
+}
+
+template
+std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> static_map_insert(
+ nvbench::state& state, nvbench::type_list)
+{
+ state.skip("Key should be the same type as Value.");
+}
+
+NVBENCH_BENCH_TYPES(static_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_insert_uniform_multiplicity")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_int64_axis("Multiplicity", defaults::MULTIPLICITY_RANGE);
+
+NVBENCH_BENCH_TYPES(static_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_insert_unique_occupancy")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Occupancy", defaults::OCCUPANCY_RANGE);
+
+NVBENCH_BENCH_TYPES(static_map_insert,
+ NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
+ defaults::VALUE_TYPE_RANGE,
+ nvbench::type_list))
+ .set_name("static_map_insert_gaussian_skew")
+ .set_type_axes_names({"Key", "Value", "Distribution"})
+ .set_max_noise(defaults::MAX_NOISE)
+ .add_float64_axis("Skew", defaults::SKEW_RANGE);
diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu
deleted file mode 100644
index e2b15b05e..000000000
--- a/benchmarks/hash_table/static_map_bench.cu
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-
-#include
-
-#include
-#include
-
-enum class dist_type { UNIQUE, UNIFORM, GAUSSIAN };
-
-template
-static void generate_keys(OutputIt output_begin, OutputIt output_end)
-{
- auto num_keys = std::distance(output_begin, output_end);
-
- std::random_device rd;
- std::mt19937 gen{rd()};
-
- switch (Dist) {
- case dist_type::UNIQUE:
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = i;
- }
- break;
- case dist_type::UNIFORM:
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = std::abs(static_cast(gen()));
- }
- break;
- case dist_type::GAUSSIAN:
- std::normal_distribution<> dg{1e9, 1e7};
- for (auto i = 0; i < num_keys; ++i) {
- output_begin[i] = std::abs(static_cast(dg(gen)));
- }
- break;
- }
-}
-
-/**
- * @brief Generates input sizes and hash table occupancies
- *
- */
-static void generate_size_and_occupancy(benchmark::internal::Benchmark* b)
-{
- for (auto size = 100'000'000; size <= 100'000'000; size *= 10) {
- for (auto occupancy = 10; occupancy <= 90; occupancy += 10) {
- b->Args({size, occupancy});
- }
- }
-}
-
-template
-static void BM_static_map_insert(::benchmark::State& state)
-{
- using map_type = cuco::static_map;
-
- std::size_t num_keys = state.range(0);
- float occupancy = state.range(1) / float{100};
- std::size_t size = num_keys / occupancy;
-
- std::vector h_keys(num_keys);
- std::vector> h_pairs(num_keys);
-
- generate_keys(h_keys.begin(), h_keys.end());
-
- for (std::size_t i = 0; i < num_keys; ++i) {
- Key key = h_keys[i];
- Value val = h_keys[i];
- h_pairs[i].first = key;
- h_pairs[i].second = val;
- }
-
- thrust::device_vector> d_pairs(h_pairs);
- thrust::device_vector d_keys(h_keys);
-
- for (auto _ : state) {
- map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}};
-
- cudaEvent_t start, stop;
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
-
- cudaEventRecord(start);
- map.insert(d_pairs.begin(), d_pairs.end());
- cudaEventRecord(stop);
- cudaEventSynchronize(stop);
-
- float ms;
- cudaEventElapsedTime(&ms, start, stop);
-
- state.SetIterationTime(ms / 1000);
- }
-
- state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
- int64_t(state.range(0)));
-}
-
-template
-static void BM_static_map_search_all(::benchmark::State& state)
-{
- using map_type = cuco::static_map;
-
- std::size_t num_keys = state.range(0);
- float occupancy = state.range(1) / float{100};
- std::size_t size = num_keys / occupancy;
-
- map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}};
-
- std::vector h_keys(num_keys);
- std::vector h_values(num_keys);
- std::vector