Merge remote-tracking branch 'origin/benchmark_reference' into benchm…

…ark_reference
rapidsai · Jan 30, 2025 · d2a22b8 · d2a22b8
2 parents b572287 + e263bc5
commit d2a22b8
Show file tree

Hide file tree

Showing 98 changed files with 1,087 additions and 905 deletions.
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [

diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.5",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -174,9 +174,6 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_pylibcugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
@@ -192,9 +189,6 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   devcontainer:
     secrets: inherit
     needs: telemetry-setup

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -49,9 +49,6 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_pylibcugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-tests-cugraph:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
@@ -61,6 +58,3 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -35,7 +35,7 @@ dependencies:
 - networkx>=2.5.1
 - ninja
 - notebook>=0.5.0
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
@@ -70,7 +70,6 @@ dependencies:
 - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1
 - torchdata
 - torchmetrics
-- ucx-proc=*=gpu
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -41,7 +41,7 @@ dependencies:
 - networkx>=2.5.1
 - ninja
 - notebook>=0.5.0
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - ogb
@@ -75,7 +75,6 @@ dependencies:
 - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1
 - torchdata
 - torchmetrics
-- ucx-proc=*=gpu
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
 name: all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -62,7 +62,7 @@ outputs:
         - cupy >=12.0.0
         - dask-cuda ={{ minor_version }}
         - dask-cudf ={{ minor_version }}
-        - numba >=0.57
+        - numba >=0.59.1,<0.61.0a0
         - numpy >=1.23,<3.0a0
         - python
         - rapids-dask-dependency ={{ minor_version }}

diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
@@ -90,7 +90,6 @@ requirements:
     - raft-dask ={{ minor_version }}
     - rapids-dask-dependency ={{ minor_version }}
     - requests
-    - ucx-proc=*=gpu
     - ucx-py {{ ucx_py_version }}
 
 tests:

diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
@@ -72,7 +72,6 @@ requirements:
     - libraft ={{ minor_version }}
     - librmm ={{ minor_version }}
     - nccl {{ nccl_version }}
-    - ucx-proc=*=gpu
     - rapids-build-backend>=0.3.1,<0.4.0.dev0
 
 outputs:
@@ -113,7 +112,6 @@ outputs:
         - libraft ={{ minor_version }}
         - librmm ={{ minor_version }}
         - nccl {{ nccl_version }}
-        - ucx-proc=*=gpu
     about:
       home: https://rapids.ai/
       dev_url: https://github.com/rapidsai/cugraph

diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,6 @@
 #include <thrust/fill.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/tuple.h>
 

diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,10 +26,10 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
@@ -43,18 +43,18 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t>
-__device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_impl(
+__device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_impl(
   raft::device_span<vertex_t const> dcs_nzd_vertices, vertex_t major)
 {
   // we can avoid binary search (and potentially improve performance) if we add an auxiliary array
   // or cuco::static_map (at the expense of additional memory)
   auto it =
     thrust::lower_bound(thrust::seq, dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major);
   return it != dcs_nzd_vertices.end()
-           ? (*it == major ? thrust::optional<vertex_t>{static_cast<vertex_t>(
+           ? (*it == major ? cuda::std::optional<vertex_t>{static_cast<vertex_t>(
                                thrust::distance(dcs_nzd_vertices.begin(), it))}
-                           : thrust::nullopt)
-           : thrust::nullopt;
+                           : cuda::std::nullopt)
+           : cuda::std::nullopt;
 }
 
 template <typename vertex_t, typename edge_t, typename return_type_t, bool multi_gpu, bool use_dcs>
@@ -490,7 +490,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_value_start_offset_;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> major_hypersparse_first() const noexcept
+  __host__ __device__ cuda::std::optional<vertex_t> major_hypersparse_first() const noexcept
   {
     return major_hypersparse_first_;
   }
@@ -528,15 +528,16 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_range_first_ + major_offset;
   }
 
-  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  __device__ cuda::std::optional<vertex_t> major_idx_from_major_nocheck(
+    vertex_t major) const noexcept
   {
     if (major_hypersparse_first_ && (major >= *major_hypersparse_first_)) {
       auto major_hypersparse_idx =
         detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major);
       return major_hypersparse_idx
-               ? thrust::make_optional((*major_hypersparse_first_ - major_range_first_) +
-                                       *major_hypersparse_idx)
-               : thrust::nullopt;
+               ? cuda::std::make_optional((*major_hypersparse_first_ - major_range_first_) +
+                                          *major_hypersparse_idx)
+               : cuda::std::nullopt;
     } else {
       return major - major_range_first_;
     }
@@ -554,60 +555,60 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
     vertex_t major) const noexcept
   {
     if (dcs_nzd_vertices_) {
       return detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major);
     } else {
-      return thrust::nullopt;
+      return cuda::std::nullopt;
     }
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
     vertex_t major_hypersparse_idx) const noexcept
   {
     return dcs_nzd_vertices_
-             ? thrust::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
-             : thrust::nullopt;
+             ? cuda::std::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
+             : cuda::std::nullopt;
   }
 
   __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
   {
     return minor_range_first_ + minor_offset;
   }
 
-  // FIxME: better return thrust::optional<raft::device_span<vertex_t const>> for consistency (see
-  // dcs_nzd_range_bitmap())
-  __host__ __device__ thrust::optional<vertex_t const*> dcs_nzd_vertices() const
+  // FIxME: better return cuda::std::optional<raft::device_span<vertex_t const>> for consistency
+  // (see dcs_nzd_range_bitmap())
+  __host__ __device__ cuda::std::optional<vertex_t const*> dcs_nzd_vertices() const
   {
-    return dcs_nzd_vertices_ ? thrust::optional<vertex_t const*>{(*dcs_nzd_vertices_).data()}
-                             : thrust::nullopt;
+    return dcs_nzd_vertices_ ? cuda::std::optional<vertex_t const*>{(*dcs_nzd_vertices_).data()}
+                             : cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> dcs_nzd_vertex_count() const
+  __host__ __device__ cuda::std::optional<vertex_t> dcs_nzd_vertex_count() const
   {
     return dcs_nzd_vertices_
-             ? thrust::optional<vertex_t>{static_cast<vertex_t>((*dcs_nzd_vertices_).size())}
-             : thrust::nullopt;
+             ? cuda::std::optional<vertex_t>{static_cast<vertex_t>((*dcs_nzd_vertices_).size())}
+             : cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap()
+  __host__ __device__ cuda::std::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap()
     const
   {
     return dcs_nzd_range_bitmap_
-             ? thrust::make_optional<raft::device_span<uint32_t const>>(
+             ? cuda::std::make_optional<raft::device_span<uint32_t const>>(
                  (*dcs_nzd_range_bitmap_).data(), (*dcs_nzd_range_bitmap_).size())
-             : thrust::nullopt;
+             : cuda::std::nullopt;
   }
 
  private:
   // should be trivially copyable to device
 
-  thrust::optional<raft::device_span<vertex_t const>> dcs_nzd_vertices_{thrust::nullopt};
-  thrust::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap_{thrust::nullopt};
-  thrust::optional<vertex_t> major_hypersparse_first_{thrust::nullopt};
+  cuda::std::optional<raft::device_span<vertex_t const>> dcs_nzd_vertices_{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap_{cuda::std::nullopt};
+  cuda::std::optional<vertex_t> major_hypersparse_first_{cuda::std::nullopt};
 
   vertex_t major_range_first_{0};
   vertex_t major_range_last_{0};
@@ -790,10 +791,10 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
 
   __host__ __device__ vertex_t major_value_start_offset() const { return vertex_t{0}; }
 
-  __host__ __device__ thrust::optional<vertex_t> major_hypersparse_first() const noexcept
+  __host__ __device__ cuda::std::optional<vertex_t> major_hypersparse_first() const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   __host__ __device__ constexpr vertex_t major_range_first() const noexcept { return vertex_t{0}; }
@@ -823,7 +824,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_offset;
   }
 
-  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  __device__ cuda::std::optional<vertex_t> major_idx_from_major_nocheck(
+    vertex_t major) const noexcept
   {
     return major_offset_from_major_nocheck(major);
   }
@@ -834,34 +836,34 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
     vertex_t major) const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
     vertex_t major_hypersparse_idx) const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
   {
     return minor_offset;
   }
 
-  __host__ __device__ thrust::optional<vertex_t const*> dcs_nzd_vertices() const
+  __host__ __device__ cuda::std::optional<vertex_t const*> dcs_nzd_vertices() const
   {
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> dcs_nzd_vertex_count() const
+  __host__ __device__ cuda::std::optional<vertex_t> dcs_nzd_vertex_count() const
   {
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
  private: