From a9c923bb3f4a6a6f5a9d46337adc65d969717567 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com>
Date: Tue, 21 Jan 2025 12:11:40 -0800
Subject: [PATCH 1/6] Replace thrust::min/max with cuda::std::min/max (#4871)

cuda::std::min/max is a more C++ standard conformant way of computing min/max of two values than thrust::min/max.

We are using cuda::std::min/max in the src directory but thrust::min/max is still used in some files under the test directory.

This PR replaces thrust::min/max calls in the test directory with cuda::std::min/max.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Joseph Nke (https://github.com/jnke2016)

URL: https://github.com/rapidsai/cugraph/pull/4871
---
 cpp/tests/sampling/detail/nbr_sampling_validate.cu | 3 ++-
 cpp/tests/utilities/check_utilities.hpp            | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/cpp/tests/sampling/detail/nbr_sampling_validate.cu b/cpp/tests/sampling/detail/nbr_sampling_validate.cu
index f399b7542eb..f360b6c04d5 100644
--- a/cpp/tests/sampling/detail/nbr_sampling_validate.cu
+++ b/cpp/tests/sampling/detail/nbr_sampling_validate.cu
@@ -28,6 +28,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/equal.h>
@@ -275,7 +276,7 @@ bool validate_sampling_depth(raft::handle_t const& handle,
                         tuple_iter + d_distances.size(),
                         d_distances.begin(),
                         [] __device__(auto tuple) {
-                          return thrust::min(thrust::get<0>(tuple), thrust::get<1>(tuple));
+                          return cuda::std::min(thrust::get<0>(tuple), thrust::get<1>(tuple));
                         });
     }
   }
diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp
index 68b4ef88dda..a22d95c87de 100644
--- a/cpp/tests/utilities/check_utilities.hpp
+++ b/cpp/tests/utilities/check_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
 #include <raft/core/handle.hpp>
 #include <raft/core/span.hpp>
 
+#include <cuda/std/functional>
+
 #include <numeric>
 #include <type_traits>
 #include <vector>
@@ -95,7 +97,7 @@ struct device_nearly_equal {
   bool __device__ operator()(type_t lhs, type_t rhs) const
   {
     return std::abs(lhs - rhs) <
-           thrust::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
+           cuda::std::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
   }
 };
 

From 9396bbba0f7ccb3ba8c9d4bd538435ce96267669 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 22 Jan 2025 00:35:15 -0600
Subject: [PATCH 2/6] use wildcards in auditwheel exclusions (#4877)

Contributes to https://github.com/rapidsai/build-planning/issues/137

Follow-up to #4804

Wheel builds here currently list out some shared library to exclude in `auditwheel repair`, which they pick up transitively via linking to `libraft`.

https://github.com/rapidsai/cugraph/blob/a9c923bb3f4a6a6f5a9d46337adc65d969717567/ci/build_wheel.sh#L42-L49

The version components of those library names can change when those libraries have ABI breakages, for example across CUDA major version boundaries. This proposes replacing specific versions with wildcards, to exclude *all* versions of those libraries.

## Notes for Reviewers

This is especially relevant given this: https://github.com/rapidsai/raft/pull/2548

For example, the latest `nvidia-cublas-cu11` has `libcublas.so.11` while `nvidia-cublas-cu12` has `libcublas.so.12`.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cugraph/pull/4877
---
 ci/build_wheel.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 40a9f85bbc7..6766eaaa244 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -40,12 +40,12 @@ EXCLUDE_ARGS=(
 # 'libraft' wheels are responsible for carrying a runtime dependency on
 # these based on RAFT's needs.
 EXCLUDE_ARGS+=(
-  --exclude "libcublas.so.12"
-  --exclude "libcublasLt.so.12"
-  --exclude "libcurand.so.10"
-  --exclude "libcusolver.so.11"
-  --exclude "libcusparse.so.12"
-  --exclude "libnvJitLink.so.12"
+  --exclude "libcublas.so.*"
+  --exclude "libcublasLt.so.*"
+  --exclude "libcurand.so.*"
+  --exclude "libcusolver.so.*"
+  --exclude "libcusparse.so.*"
+  --exclude "libnvJitLink.so.*"
 )
 
 if [[ "${package_dir}" != "python/libcugraph" ]]; then

From 079f55591fee15273f8094cef9bb84408560ab83 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 22 Jan 2025 00:42:18 -0600
Subject: [PATCH 3/6] Temporarily skip CUDA 11 wheel CI (#4876)

Due to some failures coming from libraft C++ wheels, CUDA 11 wheel CI will not pass. This PR temporarily disables CUDA 11 wheel tests until those issues can be resolved.

See https://github.com/rapidsai/build-planning/issues/137.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cugraph/pull/4876
---
 .github/workflows/pr.yaml   | 6 ++++++
 .github/workflows/test.yaml | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e48f2e11acd..bdfcd587634 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -174,6 +174,9 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_pylibcugraph.sh
+      # CUDA 11 wheel CI is disabled until
+      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
+      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
@@ -189,6 +192,9 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph.sh
+      # CUDA 11 wheel CI is disabled until
+      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
+      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 4aa698c987f..f9c3fb95379 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -49,6 +49,9 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_pylibcugraph.sh
+      # CUDA 11 wheel CI is disabled until
+      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
+      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-tests-cugraph:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
@@ -58,3 +61,6 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cugraph.sh
+      # CUDA 11 wheel CI is disabled until
+      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
+      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))

From 378c56a2cc8368459c118b594d45ae7deab3b09d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 22 Jan 2025 09:49:50 -0600
Subject: [PATCH 4/6] Add upper bound to prevent usage of numba 0.61.0 (#4878)

Numba 0.61.0 just got released with couple of breaking changes, this pr is required to unblock the ci.

xref: https://github.com/rapidsai/cudf/pull/17777

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cugraph/pull/4878
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cugraph-service/meta.yaml          | 4 ++--
 dependencies.yaml                                | 2 +-
 python/cugraph-service/server/pyproject.toml     | 2 +-
 python/cugraph/pyproject.toml                    | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index edfb9ad4906..f7f64a45b20 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -35,7 +35,7 @@ dependencies:
 - networkx>=2.5.1
 - ninja
 - notebook>=0.5.0
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 67cf6ce8b98..14e53a5c668 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -41,7 +41,7 @@ dependencies:
 - networkx>=2.5.1
 - ninja
 - notebook>=0.5.0
-- numba>=0.57
+- numba>=0.59.1,<0.61.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - ogb
diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
index 7df7573e2d0..2af3242395c 100644
--- a/conda/recipes/cugraph-service/meta.yaml
+++ b/conda/recipes/cugraph-service/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -62,7 +62,7 @@ outputs:
         - cupy >=12.0.0
         - dask-cuda ={{ minor_version }}
         - dask-cudf ={{ minor_version }}
-        - numba >=0.57
+        - numba >=0.59.1,<0.61.0a0
         - numpy >=1.23,<3.0a0
         - python
         - rapids-dask-dependency ={{ minor_version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 318374469d1..ba1bc20addb 100755
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -402,7 +402,7 @@ dependencies:
       - output_types: [conda, pyproject]
         packages:
           - &dask rapids-dask-dependency==25.2.*,>=0.0.0a0
-          - &numba numba>=0.57
+          - &numba numba>=0.59.1,<0.61.0a0
           - &numpy numpy>=1.23,<3.0a0
       - output_types: conda
         packages:
diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml
index ec75af55cb3..b265a9c945a 100644
--- a/python/cugraph-service/server/pyproject.toml
+++ b/python/cugraph-service/server/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "dask-cuda==25.2.*,>=0.0.0a0",
     "dask-cudf==25.2.*,>=0.0.0a0",
-    "numba>=0.57",
+    "numba>=0.59.1,<0.61.0a0",
     "numpy>=1.23,<3.0a0",
     "rapids-dask-dependency==25.2.*,>=0.0.0a0",
     "rmm==25.2.*,>=0.0.0a0",
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index 398e3b6099e..dfe3b085fdf 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -30,7 +30,7 @@ dependencies = [
     "dask-cudf==25.2.*,>=0.0.0a0",
     "fsspec[http]>=0.6.0",
     "libcugraph==25.2.*,>=0.0.0a0",
-    "numba>=0.57",
+    "numba>=0.59.1,<0.61.0a0",
     "numpy>=1.23,<3.0a0",
     "pylibcugraph==25.2.*,>=0.0.0a0",
     "pylibraft==25.2.*,>=0.0.0a0",

From 22aa2e13d960acad376906bc7901afb4b8928a4f Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 22 Jan 2025 13:23:55 -0600
Subject: [PATCH 5/6] Revert "Temporarily skip CUDA 11 wheel CI" (#4879)

Reverts rapidsai/cugraph#4876 now that
https://github.com/rapidsai/raft/pull/2548 has landed.
---
 .github/workflows/pr.yaml   | 6 ------
 .github/workflows/test.yaml | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index bdfcd587634..e48f2e11acd 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -174,9 +174,6 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_pylibcugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
@@ -192,9 +189,6 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f9c3fb95379..4aa698c987f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -49,9 +49,6 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_pylibcugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))
   wheel-tests-cugraph:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
@@ -61,6 +58,3 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cugraph.sh
-      # CUDA 11 wheel CI is disabled until
-      # https://github.com/rapidsai/build-planning/issues/137 is resolved.
-      matrix_filter: map(select(.CUDA_VER | startswith("11") | not))

From 285c972d4b65b9758aab74ad4ff0926b0b725311 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com>
Date: Thu, 23 Jan 2025 09:44:06 -0800
Subject: [PATCH 6/6] Fix "cudaErrorInvalidDevice: invalid device ordinal" in
 debug  runs (#4875)

This PR should replace PR #4872.

Louvain currently fails when built in the debug mode with an error message saying "cudaErrorInvalidDevice: invalid device ordinal".

This is due to a CUDA block size (set to 512) too large for debug runs due to resource limitations. This PR reduces the block sizes from 512 to 256.

Closes #4806
Closes #4873

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Joseph Nke (https://github.com/jnke2016)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: https://github.com/rapidsai/cugraph/pull/4875
---
 cpp/src/prims/detail/per_v_transform_reduce_e.cuh | 6 +++---
 cpp/src/prims/fill_edge_src_dst_property.cuh      | 4 ++--
 cpp/src/prims/vertex_frontier.cuh                 | 5 ++---
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
index 311b16e71ec..c521774a50d 100644
--- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
+++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,7 +69,7 @@ namespace cugraph {
 
 namespace detail {
 
-int32_t constexpr per_v_transform_reduce_e_kernel_block_size                        = 512;
+int32_t constexpr per_v_transform_reduce_e_kernel_block_size                        = 256;
 int32_t constexpr per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size = 128;
 
 template <typename Iterator, typename default_t, typename Enable = void>
@@ -1610,7 +1610,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle,
         edge_partition.major_range_first(),
         handle.get_stream());
       assert((*key_segment_offsets).back() == *((*key_segment_offsets).rbegin() + 1));
-      assert(sorted_uniue_nzd_key_last == sorted_unique_key_first + (*key_segment_offsets).back());
+      assert(sorted_unique_nzd_key_last == sorted_unique_key_first + (*key_segment_offsets).back());
     }
   } else {
     tmp_vertex_value_output_first = vertex_value_output_first;
diff --git a/cpp/src/prims/fill_edge_src_dst_property.cuh b/cpp/src/prims/fill_edge_src_dst_property.cuh
index a36cf332eb4..60f983e5877 100644
--- a/cpp/src/prims/fill_edge_src_dst_property.cuh
+++ b/cpp/src/prims/fill_edge_src_dst_property.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -973,7 +973,7 @@ void fill_edge_minor_property(raft::handle_t const& handle,
     assert(graph_view.local_vertex_partition_range_size() ==
            (GraphViewType::is_storage_transposed
               ? graph_view.local_edge_partition_src_range_size()
-              : graph_view.local_edge_partition_dst_range_sizse()));
+              : graph_view.local_edge_partition_dst_range_size()));
     if constexpr (contains_packed_bool_element) {
       thrust::for_each(handle.get_thrust_policy(),
                        sorted_unique_vertex_first,
diff --git a/cpp/src/prims/vertex_frontier.cuh b/cpp/src/prims/vertex_frontier.cuh
index 6e7d8515beb..629588b09f8 100644
--- a/cpp/src/prims/vertex_frontier.cuh
+++ b/cpp/src/prims/vertex_frontier.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -227,8 +227,7 @@ void retrieve_vertex_list_from_bitmap(
 {
   using vertex_t = typename thrust::iterator_traits<OutputVertexIterator>::value_type;
 
-  assert((comm.get_rank() != root) ||
-         (bitmap.size() >= packed_bool_size(vertex_range_last - vertex_ragne_first)));
+  assert((bitmap.size() >= packed_bool_size(vertex_range_last - vertex_range_first)));
   detail::copy_if_nosync(thrust::make_counting_iterator(vertex_range_first),
                          thrust::make_counting_iterator(vertex_range_last),
                          thrust::make_transform_iterator(