From fe7852d2ae53bc789d6f844b8dcfefad0b432edd Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 23 Jan 2025 22:26:59 -0600
Subject: [PATCH 1/9] update pip devcontainers to UCX 1.18 (#4890)

Contributes to https://github.com/rapidsai/build-planning/issues/138

Updates to using UCX 1.18 in pip devcontainers here.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/cugraph/pull/4890
---
 .devcontainer/cuda11.8-pip/devcontainer.json | 2 +-
 .devcontainer/cuda12.5-pip/devcontainer.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index f2abf493e12..0a6119fc825 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json
index 7024c0f0a22..fe402024e29 100644
--- a/.devcontainer/cuda12.5-pip/devcontainer.json
+++ b/.devcontainer/cuda12.5-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.5",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [

From 09f18b4730373c7875d1a375d121198b3752c5dd Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 27 Jan 2025 10:04:59 -0600
Subject: [PATCH 2/9] remove ucx-proc dependency (#4894)

Contributes to https://github.com/rapidsai/build-planning/issues/142

`ucx-proc` has been unnecessary for conda environments since UCX 1.14, and RAPIDS currently supports UCX 1.15+. This proposes removing that dependency from conda packages and environments here.

See the linked issue for more details.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cugraph/pull/4894
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 1 -
 conda/environments/all_cuda-125_arch-x86_64.yaml | 1 -
 conda/recipes/cugraph/meta.yaml                  | 1 -
 conda/recipes/libcugraph/meta.yaml               | 2 --
 dependencies.yaml                                | 1 -
 5 files changed, 6 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f7f64a45b20..1a7e2b77662 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -70,7 +70,6 @@ dependencies:
 - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1
 - torchdata
 - torchmetrics
-- ucx-proc=*=gpu
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 14e53a5c668..83126df9194 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -75,7 +75,6 @@ dependencies:
 - thriftpy2>=0.4.15,!=0.5.0,!=0.5.1
 - torchdata
 - torchmetrics
-- ucx-proc=*=gpu
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
 name: all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
index 7bc8b7f06e1..d25c3be881e 100644
--- a/conda/recipes/cugraph/meta.yaml
+++ b/conda/recipes/cugraph/meta.yaml
@@ -90,7 +90,6 @@ requirements:
     - raft-dask ={{ minor_version }}
     - rapids-dask-dependency ={{ minor_version }}
     - requests
-    - ucx-proc=*=gpu
     - ucx-py {{ ucx_py_version }}
 
 tests:
diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
index 0ff163ed737..400e0e5f226 100644
--- a/conda/recipes/libcugraph/meta.yaml
+++ b/conda/recipes/libcugraph/meta.yaml
@@ -72,7 +72,6 @@ requirements:
     - libraft ={{ minor_version }}
     - librmm ={{ minor_version }}
     - nccl {{ nccl_version }}
-    - ucx-proc=*=gpu
     - rapids-build-backend>=0.3.1,<0.4.0.dev0
 
 outputs:
@@ -113,7 +112,6 @@ outputs:
         - libraft ={{ minor_version }}
         - librmm ={{ minor_version }}
         - nccl {{ nccl_version }}
-        - ucx-proc=*=gpu
     about:
       home: https://rapids.ai/
       dev_url: https://github.com/rapidsai/cugraph
diff --git a/dependencies.yaml b/dependencies.yaml
index ba1bc20addb..02fa03cff70 100755
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -410,7 +410,6 @@ dependencies:
           - fsspec>=0.6.0
           - requests
           - nccl>=2.19
-          - ucx-proc=*=gpu
       - output_types: pyproject
         packages:
             # cudf uses fsspec but is protocol independent. cugraph

From b64b04fde6bab4ac1c810269c2101eff0e41cabd Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Tue, 28 Jan 2025 07:28:49 +0100
Subject: [PATCH 3/9] Replace all uses of `thrust::optional` with
 `cuda::std::optional` (#4891)

`thrust::optional` is being deprecated and will be removed in a future version of CCCL

Authors:
  - Michael Schellenberger Costa (https://github.com/miscco)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: https://github.com/rapidsai/cugraph/pull/4891
---
 .../detail/decompress_edge_partition.cuh      |   3 +-
 .../cugraph/edge_partition_device_view.cuh    |  84 ++++++++-------
 ...ge_partition_edge_property_device_view.cuh |   6 +-
 ...artition_endpoint_property_device_view.cuh |  13 +--
 cpp/include/cugraph/edge_property.hpp         |   8 +-
 cpp/include/cugraph/edge_src_dst_property.hpp |  10 +-
 cpp/include/cugraph/utilities/misc_utils.cuh  |  10 +-
 .../betweenness_centrality_impl.cuh           |  19 ++--
 .../approx_weighted_matching_impl.cuh         |  15 +--
 cpp/src/community/detail/common_methods.cuh   |   8 +-
 .../detail/maximal_independent_moves.cuh      |   4 +-
 cpp/src/community/detail/refine_impl.cuh      |   3 +-
 cpp/src/community/ecg_impl.cuh                |   6 +-
 .../community/edge_triangle_count_impl.cuh    |   7 +-
 cpp/src/community/k_truss_impl.cuh            |  43 ++++----
 cpp/src/community/triangle_count_impl.cuh     |  25 ++---
 cpp/src/components/mis_impl.cuh               |   4 +-
 cpp/src/components/vertex_coloring_impl.cuh   |  11 +-
 .../weakly_connected_components_impl.cuh      |  42 ++++----
 cpp/src/cores/core_number_impl.cuh            |  14 +--
 cpp/src/lookup/lookup_src_dst_impl.cuh        |  20 ++--
 .../detail/extract_transform_v_frontier_e.cuh |  44 ++++----
 cpp/src/prims/detail/nbr_intersection.cuh     | 102 +++++++++---------
 cpp/src/prims/detail/partition_v_frontier.cuh |   4 +-
 .../prims/detail/per_v_transform_reduce_e.cuh |  44 ++++----
 .../sample_and_compute_local_nbr_indices.cuh  |  24 ++---
 .../prims/detail/transform_v_frontier_e.cuh   |  18 ++--
 cpp/src/prims/extract_transform_e.cuh         |   8 +-
 ...xtract_transform_v_frontier_outgoing_e.cuh |   8 +-
 cpp/src/prims/fill_edge_property.cuh          |   7 +-
 ..._v_pair_transform_dst_nbr_intersection.cuh |  61 +++++------
 ...r_v_random_select_transform_outgoing_e.cuh |  18 ++--
 ...m_reduce_dst_key_aggregated_outgoing_e.cuh |  57 +++++-----
 cpp/src/prims/transform_e.cuh                 |  23 ++--
 ...t_nbr_intersection_of_e_endpoints_by_v.cuh |   8 +-
 cpp/src/prims/transform_reduce_e.cuh          |  22 ++--
 .../transform_reduce_e_by_src_dst_key.cuh     |  42 ++++----
 ...rm_reduce_v_frontier_outgoing_e_by_dst.cuh |  28 ++---
 cpp/src/prims/update_v_frontier.cuh           |  22 ++--
 .../detail/gather_one_hop_edgelist_impl.cuh   |  46 ++++----
 .../detail/prepare_next_frontier_impl.cuh     |   5 +-
 .../remove_visited_vertices_from_frontier.cuh |   5 +-
 ...sited_vertices_from_frontier_sg_v32_e32.cu |   5 +-
 ...sited_vertices_from_frontier_sg_v64_e64.cu |   5 +-
 cpp/src/sampling/detail/sample_edges.cuh      |  12 +--
 cpp/src/sampling/detail/sampling_utils.hpp    |   4 +-
 .../shuffle_and_organize_output_impl.cuh      |   5 +-
 cpp/src/sampling/neighbor_sampling_impl.hpp   |   7 +-
 cpp/src/sampling/random_walks.cuh             |   8 +-
 cpp/src/sampling/random_walks_impl.cuh        |  34 +++---
 cpp/src/sampling/rw_traversals.hpp            |  33 +++---
 .../sampling_post_processing_impl.cuh         |  65 +++++------
 cpp/src/structure/graph_view_impl.cuh         |  27 ++---
 cpp/src/structure/induced_subgraph_impl.cuh   |  27 ++---
 cpp/src/traversal/bfs_impl.cuh                |  31 ++++--
 cpp/src/traversal/k_hop_nbrs_impl.cuh         |  14 +--
 .../traversal/od_shortest_distances_impl.cuh  |  20 ++--
 cpp/src/traversal/sssp_impl.cuh               |  28 ++---
 .../components/mg_vertex_coloring_test.cu     |   8 +-
 cpp/tests/components/vertex_coloring_test.cu  |   8 +-
 cpp/tests/prims/mg_count_if_e.cu              |  25 ++---
 cpp/tests/prims/mg_extract_transform_e.cu     |  14 +--
 ...extract_transform_v_frontier_outgoing_e.cu |  10 +-
 ...er_v_random_select_transform_outgoing_e.cu |  30 +++---
 ...rm_reduce_dst_key_aggregated_outgoing_e.cu |  16 +--
 ..._v_transform_reduce_incoming_outgoing_e.cu |   6 +-
 cpp/tests/prims/mg_transform_e.cu             |  10 +-
 cpp/tests/prims/mg_transform_reduce_e.cu      |   9 +-
 .../mg_transform_reduce_e_by_src_dst_key.cu   |  14 +--
 ...orm_reduce_v_frontier_outgoing_e_by_dst.cu |  20 ++--
 cpp/tests/prims/result_compare.cuh            |  27 ++---
 .../sampling_post_processing_validate.cu      |  48 +++++----
 .../utilities/property_generator_kernels.cuh  |  11 +-
 73 files changed, 798 insertions(+), 734 deletions(-)

diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
index c94f456f215..b31624da840 100644
--- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh
+++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,6 @@
 #include <thrust/fill.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/tuple.h>
 
diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh
index 628c3cc10cc..b887309bf6f 100644
--- a/cpp/include/cugraph/edge_partition_device_view.cuh
+++ b/cpp/include/cugraph/edge_partition_device_view.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,10 +26,10 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
@@ -43,7 +43,7 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t>
-__device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_impl(
+__device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_impl(
   raft::device_span<vertex_t const> dcs_nzd_vertices, vertex_t major)
 {
   // we can avoid binary search (and potentially improve performance) if we add an auxiliary array
@@ -51,10 +51,10 @@ __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_i
   auto it =
     thrust::lower_bound(thrust::seq, dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major);
   return it != dcs_nzd_vertices.end()
-           ? (*it == major ? thrust::optional<vertex_t>{static_cast<vertex_t>(
+           ? (*it == major ? cuda::std::optional<vertex_t>{static_cast<vertex_t>(
                                thrust::distance(dcs_nzd_vertices.begin(), it))}
-                           : thrust::nullopt)
-           : thrust::nullopt;
+                           : cuda::std::nullopt)
+           : cuda::std::nullopt;
 }
 
 template <typename vertex_t, typename edge_t, typename return_type_t, bool multi_gpu, bool use_dcs>
@@ -490,7 +490,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_value_start_offset_;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> major_hypersparse_first() const noexcept
+  __host__ __device__ cuda::std::optional<vertex_t> major_hypersparse_first() const noexcept
   {
     return major_hypersparse_first_;
   }
@@ -528,15 +528,16 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_range_first_ + major_offset;
   }
 
-  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  __device__ cuda::std::optional<vertex_t> major_idx_from_major_nocheck(
+    vertex_t major) const noexcept
   {
     if (major_hypersparse_first_ && (major >= *major_hypersparse_first_)) {
       auto major_hypersparse_idx =
         detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major);
       return major_hypersparse_idx
-               ? thrust::make_optional((*major_hypersparse_first_ - major_range_first_) +
-                                       *major_hypersparse_idx)
-               : thrust::nullopt;
+               ? cuda::std::make_optional((*major_hypersparse_first_ - major_range_first_) +
+                                          *major_hypersparse_idx)
+               : cuda::std::nullopt;
     } else {
       return major - major_range_first_;
     }
@@ -554,23 +555,23 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
     vertex_t major) const noexcept
   {
     if (dcs_nzd_vertices_) {
       return detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major);
     } else {
-      return thrust::nullopt;
+      return cuda::std::nullopt;
     }
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
     vertex_t major_hypersparse_idx) const noexcept
   {
     return dcs_nzd_vertices_
-             ? thrust::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
-             : thrust::nullopt;
+             ? cuda::std::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
+             : cuda::std::nullopt;
   }
 
   __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
@@ -578,36 +579,36 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return minor_range_first_ + minor_offset;
   }
 
-  // FIxME: better return thrust::optional<raft::device_span<vertex_t const>> for consistency (see
-  // dcs_nzd_range_bitmap())
-  __host__ __device__ thrust::optional<vertex_t const*> dcs_nzd_vertices() const
+  // FIxME: better return cuda::std::optional<raft::device_span<vertex_t const>> for consistency
+  // (see dcs_nzd_range_bitmap())
+  __host__ __device__ cuda::std::optional<vertex_t const*> dcs_nzd_vertices() const
   {
-    return dcs_nzd_vertices_ ? thrust::optional<vertex_t const*>{(*dcs_nzd_vertices_).data()}
-                             : thrust::nullopt;
+    return dcs_nzd_vertices_ ? cuda::std::optional<vertex_t const*>{(*dcs_nzd_vertices_).data()}
+                             : cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> dcs_nzd_vertex_count() const
+  __host__ __device__ cuda::std::optional<vertex_t> dcs_nzd_vertex_count() const
   {
     return dcs_nzd_vertices_
-             ? thrust::optional<vertex_t>{static_cast<vertex_t>((*dcs_nzd_vertices_).size())}
-             : thrust::nullopt;
+             ? cuda::std::optional<vertex_t>{static_cast<vertex_t>((*dcs_nzd_vertices_).size())}
+             : cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap()
+  __host__ __device__ cuda::std::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap()
     const
   {
     return dcs_nzd_range_bitmap_
-             ? thrust::make_optional<raft::device_span<uint32_t const>>(
+             ? cuda::std::make_optional<raft::device_span<uint32_t const>>(
                  (*dcs_nzd_range_bitmap_).data(), (*dcs_nzd_range_bitmap_).size())
-             : thrust::nullopt;
+             : cuda::std::nullopt;
   }
 
  private:
   // should be trivially copyable to device
 
-  thrust::optional<raft::device_span<vertex_t const>> dcs_nzd_vertices_{thrust::nullopt};
-  thrust::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap_{thrust::nullopt};
-  thrust::optional<vertex_t> major_hypersparse_first_{thrust::nullopt};
+  cuda::std::optional<raft::device_span<vertex_t const>> dcs_nzd_vertices_{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<uint32_t const>> dcs_nzd_range_bitmap_{cuda::std::nullopt};
+  cuda::std::optional<vertex_t> major_hypersparse_first_{cuda::std::nullopt};
 
   vertex_t major_range_first_{0};
   vertex_t major_range_last_{0};
@@ -790,10 +791,10 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
 
   __host__ __device__ vertex_t major_value_start_offset() const { return vertex_t{0}; }
 
-  __host__ __device__ thrust::optional<vertex_t> major_hypersparse_first() const noexcept
+  __host__ __device__ cuda::std::optional<vertex_t> major_hypersparse_first() const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   __host__ __device__ constexpr vertex_t major_range_first() const noexcept { return vertex_t{0}; }
@@ -823,7 +824,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_offset;
   }
 
-  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  __device__ cuda::std::optional<vertex_t> major_idx_from_major_nocheck(
+    vertex_t major) const noexcept
   {
     return major_offset_from_major_nocheck(major);
   }
@@ -834,19 +836,19 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nocheck(
     vertex_t major) const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   // major_hypersparse_idx: index within the hypersparse segment
-  __device__ thrust::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
+  __device__ cuda::std::optional<vertex_t> major_from_major_hypersparse_idx_nocheck(
     vertex_t major_hypersparse_idx) const noexcept
   {
     assert(false);
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
   __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
@@ -854,14 +856,14 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return minor_offset;
   }
 
-  __host__ __device__ thrust::optional<vertex_t const*> dcs_nzd_vertices() const
+  __host__ __device__ cuda::std::optional<vertex_t const*> dcs_nzd_vertices() const
   {
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
-  __host__ __device__ thrust::optional<vertex_t> dcs_nzd_vertex_count() const
+  __host__ __device__ cuda::std::optional<vertex_t> dcs_nzd_vertex_count() const
   {
-    return thrust::nullopt;
+    return cuda::std::nullopt;
   }
 
  private:
diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh
index 4b324bcf348..061c4108f4d 100644
--- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh
+++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh
@@ -21,8 +21,8 @@
 #include <cugraph/utilities/packed_bool_utils.hpp>
 #include <cugraph/utilities/thrust_tuple_utils.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/iterator_traits.h>
-#include <thrust/optional.h>
 
 namespace cugraph {
 
@@ -182,7 +182,7 @@ template <typename edge_t>
 class edge_partition_edge_dummy_property_device_view_t {
  public:
   using edge_type  = edge_t;
-  using value_type = thrust::nullopt_t;
+  using value_type = cuda::std::nullopt_t;
 
   static constexpr bool is_packed_bool          = false;
   static constexpr bool has_packed_bool_element = false;
@@ -194,7 +194,7 @@ class edge_partition_edge_dummy_property_device_view_t {
   {
   }
 
-  __device__ auto get(edge_t offset) const { return thrust::nullopt; }
+  __device__ auto get(edge_t offset) const { return cuda::std::nullopt; }
 };
 
 }  // namespace detail
diff --git a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh
index f86675e5572..e5600848db6 100644
--- a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh
+++ b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh
@@ -23,12 +23,12 @@
 
 #include <raft/core/device_span.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
 #include <thrust/fill.h>
 #include <thrust/iterator/iterator_traits.h>
-#include <thrust/optional.h>
 
 namespace cugraph {
 
@@ -184,9 +184,10 @@ class edge_partition_endpoint_property_device_view_t {
   }
 
  private:
-  thrust::optional<raft::device_span<vertex_t const>> keys_{thrust::nullopt};
-  thrust::optional<raft::device_span<vertex_t const>> key_chunk_start_offsets_{thrust::nullopt};
-  thrust::optional<size_t> key_chunk_size_{thrust::nullopt};
+  cuda::std::optional<raft::device_span<vertex_t const>> keys_{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<vertex_t const>> key_chunk_start_offsets_{
+    cuda::std::nullopt};
+  cuda::std::optional<size_t> key_chunk_size_{cuda::std::nullopt};
 
   ValueIterator value_first_{};
   vertex_t range_first_{};
@@ -214,7 +215,7 @@ template <typename vertex_t>
 class edge_partition_endpoint_dummy_property_device_view_t {
  public:
   using vertex_type                             = vertex_t;
-  using value_type                              = thrust::nullopt_t;
+  using value_type                              = cuda::std::nullopt_t;
   static constexpr bool is_packed_bool          = false;
   static constexpr bool has_packed_bool_element = false;
 
@@ -230,7 +231,7 @@ class edge_partition_endpoint_dummy_property_device_view_t {
   {
   }
 
-  __device__ auto get(vertex_t offset) const { return thrust::nullopt; }
+  __device__ auto get(vertex_t offset) const { return cuda::std::nullopt; }
 };
 
 }  // namespace detail
diff --git a/cpp/include/cugraph/edge_property.hpp b/cpp/include/cugraph/edge_property.hpp
index d46d4e52fd4..11041f504f3 100644
--- a/cpp/include/cugraph/edge_property.hpp
+++ b/cpp/include/cugraph/edge_property.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,8 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/iterator_traits.h>
-#include <thrust/optional.h>
 
 #include <optional>
 #include <type_traits>
@@ -63,7 +63,7 @@ class edge_property_view_t {
 
 class edge_dummy_property_view_t {
  public:
-  using value_type     = thrust::nullopt_t;
+  using value_type     = cuda::std::nullopt_t;
   using value_iterator = void*;
 };
 
@@ -155,7 +155,7 @@ class edge_property_t {
 
 class edge_dummy_property_t {
  public:
-  using value_type = thrust::nullopt_t;
+  using value_type = cuda::std::nullopt_t;
 
   auto view() const { return edge_dummy_property_view_t{}; }
 };
diff --git a/cpp/include/cugraph/edge_src_dst_property.hpp b/cpp/include/cugraph/edge_src_dst_property.hpp
index d27f6856428..f7096ce32fa 100644
--- a/cpp/include/cugraph/edge_src_dst_property.hpp
+++ b/cpp/include/cugraph/edge_src_dst_property.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 #include <raft/core/handle.hpp>
 #include <raft/core/host_span.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <optional>
@@ -365,7 +365,7 @@ class edge_minor_property_t {
 
 class edge_endpoint_dummy_property_view_t {
  public:
-  using value_type     = thrust::nullopt_t;
+  using value_type     = cuda::std::nullopt_t;
   using value_iterator = void*;
 };
 
@@ -557,14 +557,14 @@ class edge_dst_property_t {
 
 class edge_src_dummy_property_t {
  public:
-  using value_type = thrust::nullopt_t;
+  using value_type = cuda::std::nullopt_t;
 
   auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; }
 };
 
 class edge_dst_dummy_property_t {
  public:
-  using value_type = thrust::nullopt_t;
+  using value_type = cuda::std::nullopt_t;
 
   auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; }
 };
diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh
index 91a349007da..b25dd9a41f4 100644
--- a/cpp/include/cugraph/utilities/misc_utils.cuh
+++ b/cpp/include/cugraph/utilities/misc_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,11 +23,11 @@
 
 #include <cuda/atomic>
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 
 #include <optional>
 #include <tuple>
@@ -87,15 +87,15 @@ std::tuple<std::vector<vertex_t>, std::vector<offset_t>> compute_offset_aligned_
 }
 
 template <typename T>
-thrust::optional<T> to_thrust_optional(std::optional<T> val)
+cuda::std::optional<T> to_thrust_optional(std::optional<T> val)
 {
-  thrust::optional<T> ret{thrust::nullopt};
+  cuda::std::optional<T> ret{cuda::std::nullopt};
   if (val) { ret = *val; }
   return ret;
 }
 
 template <typename T>
-std::optional<T> to_std_optional(thrust::optional<T> val)
+std::optional<T> to_std_optional(cuda::std::optional<T> val)
 {
   std::optional<T> ret{std::nullopt};
   if (val) { ret = *val; }
diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh
index 88ef3987a03..4eb57b621ea 100644
--- a/cpp/src/centrality/betweenness_centrality_impl.cuh
+++ b/cpp/src/centrality/betweenness_centrality_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,8 +36,8 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/functional.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 
 //
@@ -52,10 +52,11 @@ struct brandes_e_op_t {
   const vertex_t invalid_distance_{std::numeric_limits<vertex_t>::max()};
 
   template <typename value_t, typename ignore_t>
-  __device__ thrust::optional<value_t> operator()(
+  __device__ cuda::std::optional<value_t> operator()(
     vertex_t, vertex_t, value_t src_sigma, vertex_t dst_distance, ignore_t) const
   {
-    return (dst_distance == invalid_distance_) ? thrust::make_optional(src_sigma) : thrust::nullopt;
+    return (dst_distance == invalid_distance_) ? cuda::std::make_optional(src_sigma)
+                                               : cuda::std::nullopt;
   }
 };
 
@@ -64,7 +65,7 @@ struct extract_edge_e_op_t {
   vertex_t d{};
 
   template <typename edge_t, typename weight_t>
-  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
     vertex_t src,
     vertex_t dst,
     thrust::tuple<vertex_t, edge_t, weight_t> src_props,
@@ -72,8 +73,8 @@ struct extract_edge_e_op_t {
     weight_t edge_centrality) const
   {
     return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1)))
-             ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
-             : thrust::nullopt;
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             : cuda::std::nullopt;
   }
 };
 
@@ -153,8 +154,8 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
                       thrust::make_zip_iterator(distances.begin(), sigmas.begin()),
                       [hop] __device__(auto v, auto old_values, auto v_sigma) {
                         return thrust::make_tuple(
-                          thrust::make_optional(bucket_idx_next),
-                          thrust::make_optional(thrust::make_tuple(hop + 1, v_sigma)));
+                          cuda::std::make_optional(bucket_idx_next),
+                          cuda::std::make_optional(thrust::make_tuple(hop + 1, v_sigma)));
                       });
 
     vertex_frontier.bucket(bucket_idx_cur).clear();
diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh
index 869ed4e7ae6..57bf9d50b9a 100644
--- a/cpp/src/community/approx_weighted_matching_impl.cuh
+++ b/cpp/src/community/approx_weighted_matching_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,7 +66,8 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
     cugraph::edge_src_dummy_property_t{}.view(),
     cugraph::edge_dst_dummy_property_t{}.view(),
     cugraph::edge_dummy_property_t{}.view(),
-    [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) {
+    [] __device__(
+      auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
       return !(src == dst);
     },
     edge_masks_even.mutable_view());
@@ -130,7 +131,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
         graph_view_t::is_multi_gpu
           ? src_key_cache.view()
           : detail::edge_major_property_view_t<vertex_t, vertex_t const*>(local_vertices.begin()),
-        [] __device__(auto, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wt) {
+        [] __device__(auto, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto wt) {
           return thrust::make_tuple(wt, dst);
         },
         thrust::make_tuple(weight_t{0.0}, invalid_partner),
@@ -314,7 +315,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
           dst_match_flags.view(),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) {
+            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_odd.mutable_view());
@@ -327,7 +328,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
                                                                     vertex_t{0}),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) {
+            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_odd.mutable_view());
@@ -346,7 +347,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
           dst_match_flags.view(),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) {
+            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_even.mutable_view());
@@ -359,7 +360,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
                                                                     vertex_t{0}),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) {
+            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_even.mutable_view());
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
index 18fb3fdb251..d37a8864e68 100644
--- a/cpp/src/community/detail/common_methods.cuh
+++ b/cpp/src/community/detail/common_methods.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,11 +30,11 @@
 #include <cugraph/graph_functions.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
@@ -140,7 +140,7 @@ struct cluster_update_op_t {
 template <typename vertex_t, typename weight_t>
 struct return_edge_weight_t {
   __device__ auto operator()(
-    vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, weight_t w) const
+    vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, weight_t w) const
   {
     return w;
   }
@@ -150,7 +150,7 @@ struct return_edge_weight_t {
 template <typename vertex_t, typename weight_t>
 struct return_one_t {
   __device__ auto operator()(
-    vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+    vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const
   {
     return 1.0;
   }
diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh
index 85892f711ba..70b812d687a 100644
--- a/cpp/src/community/detail/maximal_independent_moves.cuh
+++ b/cpp/src/community/detail/maximal_independent_moves.cuh
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,13 +32,13 @@
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/merge.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/set_operations.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 
 #include <cmath>
+#include <optional>
 
 namespace cugraph {
 
diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
index 01a68a3a0d7..6038fcc6f27 100644
--- a/cpp/src/community/detail/refine_impl.cuh
+++ b/cpp/src/community/detail/refine_impl.cuh
@@ -38,7 +38,6 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/random.h>
 #include <thrust/sequence.h>
 #include <thrust/shuffle.h>
@@ -48,6 +47,8 @@
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
 
+#include <optional>
+
 CUCO_DECLARE_BITWISE_COMPARABLE(float)
 CUCO_DECLARE_BITWISE_COMPARABLE(double)
 // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched.
diff --git a/cpp/src/community/ecg_impl.cuh b/cpp/src/community/ecg_impl.cuh
index d01b13f0b35..100efdb025d 100644
--- a/cpp/src/community/ecg_impl.cuh
+++ b/cpp/src/community/ecg_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,8 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
+
 namespace cugraph {
 
 namespace detail {
@@ -106,7 +108,7 @@ std::tuple<rmm::device_uvector<vertex_t>, size_t, weight_t> ecg(
     edge_dst_dummy_property_t{}.view(),
     view_concat(*edge_weight_view, modified_edge_weights.view()),
     [min_weight, ensemble_size = static_cast<weight_t>(ensemble_size)] __device__(
-      auto, auto, thrust::nullopt_t, thrust::nullopt_t, auto edge_properties) {
+      auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, auto edge_properties) {
       auto e_weight    = thrust::get<0>(edge_properties);
       auto e_frequency = thrust::get<1>(edge_properties);
       return min_weight + (e_weight - min_weight) * e_frequency / ensemble_size;
diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh
index fbf47615dbe..b210bcacf35 100644
--- a/cpp/src/community/edge_triangle_count_impl.cuh
+++ b/cpp/src/community/edge_triangle_count_impl.cuh
@@ -28,6 +28,7 @@
 
 #include <raft/util/integer_utils.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/adjacent_difference.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/sort.h>
@@ -353,9 +354,9 @@ edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_t
      num_edges     = edgelist_srcs.size(),
      num_triangles = num_triangles.data()] __device__(auto src,
                                                       auto dst,
-                                                      thrust::nullopt_t,
-                                                      thrust::nullopt_t,
-                                                      thrust::nullopt_t) {
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t) {
       auto pair = thrust::make_tuple(src, dst);
 
       // Find its position in 'edges'
diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh
index 2b712a6de77..bd497b9c58c 100644
--- a/cpp/src/community/k_truss_impl.cuh
+++ b/cpp/src/community/k_truss_impl.cuh
@@ -31,12 +31,12 @@
 
 #include <raft/util/integer_utils.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
@@ -47,47 +47,52 @@ namespace {
 
 template <typename vertex_t>
 struct exclude_self_loop_t {
-  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
-    vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src,
+    vertex_t dst,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t) const
   {
     return src != dst
-             ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
-             : thrust::nullopt;
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             : cuda::std::nullopt;
   }
 };
 
 template <typename vertex_t, typename weight_t, typename edge_t>
 struct extract_low_to_high_degree_weighted_edges_t {
-  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t, weight_t>> operator()(
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>> operator()(
     vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const
   {
     return (src_out_degree < dst_out_degree)
-             ? thrust::optional<thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
                  src, dst, wgt)}
              : (((src_out_degree == dst_out_degree) &&
                  (src < dst) /* tie-breaking using vertex ID */)
-                  ? thrust::optional<
+                  ? cuda::std::optional<
                       thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
                       src, dst, wgt)}
-                  : thrust::nullopt);
+                  : cuda::std::nullopt);
   }
 };
 
 template <typename vertex_t, typename edge_t>
 struct extract_low_to_high_degree_edges_t {
-  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t>> operator()(vertex_t src,
-                                                                            vertex_t dst,
-                                                                            edge_t src_out_degree,
-                                                                            edge_t dst_out_degree,
-                                                                            thrust::nullopt_t) const
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src,
+    vertex_t dst,
+    edge_t src_out_degree,
+    edge_t dst_out_degree,
+    cuda::std::nullopt_t) const
   {
     return (src_out_degree < dst_out_degree)
-             ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
              : (((src_out_degree == dst_out_degree) &&
                  (src < dst) /* tie-breaking using vertex ID */)
-                  ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
-                                                                                           dst)}
-                  : thrust::nullopt);
+                  ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
+                                                                                              dst)}
+                  : cuda::std::nullopt);
   }
 };
 
@@ -343,7 +348,7 @@ k_truss(raft::handle_t const& handle,
         cugraph::edge_src_dummy_property_t{}.view(),
         cugraph::edge_dst_dummy_property_t{}.view(),
         edge_triangle_counts.view(),
-        [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) {
+        [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) {
           return count >= k - 2;
         },
         edge_mask.mutable_view(),
diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh
index 100451f06f3..6718d08cdd7 100644
--- a/cpp/src/community/triangle_count_impl.cuh
+++ b/cpp/src/community/triangle_count_impl.cuh
@@ -27,6 +27,7 @@
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -34,7 +35,6 @@
 #include <thrust/execution_policy.h>
 #include <thrust/fill.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/scatter.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
@@ -64,19 +64,20 @@ struct is_two_or_greater_t {
 
 template <typename vertex_t, typename edge_t>
 struct extract_low_to_high_degree_edges_t {
-  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t>> operator()(vertex_t src,
-                                                                            vertex_t dst,
-                                                                            edge_t src_out_degree,
-                                                                            edge_t dst_out_degree,
-                                                                            thrust::nullopt_t) const
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src,
+    vertex_t dst,
+    edge_t src_out_degree,
+    edge_t dst_out_degree,
+    cuda::std::nullopt_t) const
   {
     return (src_out_degree < dst_out_degree)
-             ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
              : (((src_out_degree == dst_out_degree) &&
                  (src < dst) /* tie-breaking using vertex ID */)
-                  ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
-                                                                                           dst)}
-                  : thrust::nullopt);
+                  ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
+                                                                                              dst)}
+                  : cuda::std::nullopt);
   }
 };
 
@@ -85,8 +86,8 @@ struct intersection_op_t {
   __device__ thrust::tuple<edge_t, edge_t, edge_t> operator()(
     vertex_t,
     vertex_t,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
     raft::device_span<vertex_t const> intersection) const
   {
     return thrust::make_tuple(static_cast<edge_t>(intersection.size()),
diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh
index b593c639946..2714d7e3d63 100644
--- a/cpp/src/components/mis_impl.cuh
+++ b/cpp/src/components/mis_impl.cuh
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,13 +32,13 @@
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/merge.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/set_operations.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 
 #include <cmath>
+#include <optional>
 
 namespace cugraph {
 
diff --git a/cpp/src/components/vertex_coloring_impl.cuh b/cpp/src/components/vertex_coloring_impl.cuh
index fa7fb1f6099..7ad06d12027 100644
--- a/cpp/src/components/vertex_coloring_impl.cuh
+++ b/cpp/src/components/vertex_coloring_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,8 @@
 #include <raft/core/handle.hpp>
 #include <raft/random/rng_state.hpp>
 
+#include <cuda/std/optional>
+
 namespace cugraph {
 
 namespace detail {
@@ -52,7 +54,8 @@ rmm::device_uvector<vertex_t> vertex_coloring(
     edge_src_dummy_property_t{}.view(),
     edge_dst_dummy_property_t{}.view(),
     cugraph::edge_dummy_property_t{}.view(),
-    [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) {
+    [] __device__(
+      auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
       return !(src == dst);  // mask out self-loop
     },
     edge_masks_even.mutable_view());
@@ -119,7 +122,7 @@ rmm::device_uvector<vertex_t> vertex_coloring(
                                        is_vertex_in_mis.begin(), vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
         [color_id] __device__(
-          auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) {
+          auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) {
           return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true}));
         },
         edge_masks_odd.mutable_view());
@@ -140,7 +143,7 @@ rmm::device_uvector<vertex_t> vertex_coloring(
                                        is_vertex_in_mis.begin(), vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
         [color_id] __device__(
-          auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) {
+          auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) {
           return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true}));
         },
         edge_masks_even.mutable_view());
diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh
index e791f4dcad3..46db347e0bc 100644
--- a/cpp/src/components/weakly_connected_components_impl.cuh
+++ b/cpp/src/components/weakly_connected_components_impl.cuh
@@ -35,6 +35,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
@@ -45,7 +46,6 @@
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/merge.h>
-#include <thrust/optional.h>
 #include <thrust/partition.h>
 #include <thrust/random.h>
 #include <thrust/scan.h>
@@ -189,11 +189,11 @@ struct e_op_t {
   EdgeIterator edge_buffer_first{};
   size_t* num_edge_inserts{};
 
-  __device__ thrust::optional<vertex_t> operator()(thrust::tuple<vertex_t, vertex_t> tagged_src,
-                                                   vertex_t dst,
-                                                   thrust::nullopt_t,
-                                                   thrust::nullopt_t,
-                                                   thrust::nullopt_t) const
+  __device__ cuda::std::optional<vertex_t> operator()(thrust::tuple<vertex_t, vertex_t> tagged_src,
+                                                      vertex_t dst,
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t) const
   {
     auto tag        = thrust::get<1>(tagged_src);
     auto dst_offset = dst - dst_first;
@@ -207,8 +207,8 @@ struct e_op_t {
       *(edge_buffer_first + edge_idx) =
         tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag);
     }
-    return old == invalid_component_id<vertex_t>::value ? thrust::optional<vertex_t>{tag}
-                                                        : thrust::nullopt;
+    return old == invalid_component_id<vertex_t>::value ? cuda::std::optional<vertex_t>{tag}
+                                                        : cuda::std::nullopt;
   }
 };
 
@@ -231,9 +231,10 @@ struct v_op_t {
   size_t bucket_idx_conflict{};  // relevant only if GraphViewType::is_multi_gpu is true
 
   template <bool multi_gpu = GraphViewType::is_multi_gpu>
-  __device__ std::enable_if_t<multi_gpu,
-                              thrust::tuple<thrust::optional<size_t>, thrust::optional<std::byte>>>
-  operator()(thrust::tuple<vertex_type, vertex_type> tagged_v, int /* v_val */) const
+  __device__
+    std::enable_if_t<multi_gpu,
+                     thrust::tuple<cuda::std::optional<size_t>, cuda::std::optional<std::byte>>>
+    operator()(thrust::tuple<vertex_type, vertex_type> tagged_v, int /* v_val */) const
   {
     auto tag = thrust::get<1>(tagged_v);
     auto v_offset =
@@ -242,22 +243,23 @@ struct v_op_t {
     auto old     = invalid_component_id<vertex_type>::value;
     bool success = v_component.compare_exchange_strong(old, tag, cuda::std::memory_order_relaxed);
     if (!success && (old != tag)) {  // conflict
-      return thrust::make_tuple(thrust::optional<size_t>{bucket_idx_conflict},
-                                thrust::optional<std::byte>{std::byte{0}} /* dummy */);
+      return thrust::make_tuple(cuda::std::optional<size_t>{bucket_idx_conflict},
+                                cuda::std::optional<std::byte>{std::byte{0}} /* dummy */);
     } else {
       return thrust::make_tuple(
-        success ? thrust::optional<size_t>{bucket_idx_next} : thrust::nullopt,
-        success ? thrust::optional<std::byte>{std::byte{0}} /* dummy */ : thrust::nullopt);
+        success ? cuda::std::optional<size_t>{bucket_idx_next} : cuda::std::nullopt,
+        success ? cuda::std::optional<std::byte>{std::byte{0}} /* dummy */ : cuda::std::nullopt);
     }
   }
 
   template <bool multi_gpu = GraphViewType::is_multi_gpu>
-  __device__ std::enable_if_t<!multi_gpu,
-                              thrust::tuple<thrust::optional<size_t>, thrust::optional<std::byte>>>
-  operator()(thrust::tuple<vertex_type, vertex_type> /* tagged_v */, int /* v_val */) const
+  __device__
+    std::enable_if_t<!multi_gpu,
+                     thrust::tuple<cuda::std::optional<size_t>, cuda::std::optional<std::byte>>>
+    operator()(thrust::tuple<vertex_type, vertex_type> /* tagged_v */, int /* v_val */) const
   {
-    return thrust::make_tuple(thrust::optional<size_t>{bucket_idx_next},
-                              thrust::optional<std::byte>{std::byte{0}} /* dummy */);
+    return thrust::make_tuple(cuda::std::optional<size_t>{bucket_idx_next},
+                              cuda::std::optional<std::byte>{std::byte{0}} /* dummy */);
   }
 };
 
diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh
index a2b6f6430f0..f1ff0912002 100644
--- a/cpp/src/cores/core_number_impl.cuh
+++ b/cpp/src/cores/core_number_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/for_each.h>
@@ -35,7 +36,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/partition.h>
 #include <thrust/reduce.h>
 #include <thrust/remove.h>
@@ -53,10 +53,10 @@ struct e_op_t {
   size_t k{};
   edge_t delta{};
 
-  __device__ thrust::optional<edge_t> operator()(
-    vertex_t, vertex_t, thrust::nullopt_t, edge_t dst_val, thrust::nullopt_t) const
+  __device__ cuda::std::optional<edge_t> operator()(
+    vertex_t, vertex_t, cuda::std::nullopt_t, edge_t dst_val, cuda::std::nullopt_t) const
   {
-    return dst_val >= k ? thrust::optional<edge_t>{delta} : thrust::nullopt;
+    return dst_val >= k ? cuda::std::optional<edge_t>{delta} : cuda::std::nullopt;
   }
 };
 
@@ -251,8 +251,8 @@ void core_number(raft::handle_t const& handle,
               auto new_core_number = v_val >= pushed_val ? v_val - pushed_val : edge_t{0};
               new_core_number      = new_core_number < (k - delta) ? (k - delta) : new_core_number;
               new_core_number      = new_core_number < k_first ? edge_t{0} : new_core_number;
-              return thrust::make_tuple(thrust::optional<size_t>{bucket_idx_next},
-                                        thrust::optional<edge_t>{new_core_number});
+              return thrust::make_tuple(cuda::std::optional<size_t>{bucket_idx_next},
+                                        cuda::std::optional<edge_t>{new_core_number});
             });
         }
 
diff --git a/cpp/src/lookup/lookup_src_dst_impl.cuh b/cpp/src/lookup/lookup_src_dst_impl.cuh
index 45bbf870d80..dd03e621022 100644
--- a/cpp/src/lookup/lookup_src_dst_impl.cuh
+++ b/cpp/src/lookup/lookup_src_dst_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,6 +29,8 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
+
 namespace cugraph {
 
 template <typename edge_id_t, typename edge_type_t, typename vertex_t, typename value_t>
@@ -370,7 +372,7 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku
   EdgeIdInputWrapper edge_id_view,
   EdgeTypeInputWrapper edge_type_view)
 {
-  static_assert(!std::is_same_v<typename EdgeIdInputWrapper::value_type, thrust::nullopt_t>,
+  static_assert(!std::is_same_v<typename EdgeIdInputWrapper::value_type, cuda::std::nullopt_t>,
                 "Can not create edge id lookup table without edge ids");
 
   using vertex_t    = typename GraphViewType::vertex_type;
@@ -411,17 +413,17 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku
         cugraph::edge_src_dummy_property_t{}.view(),
         cugraph::edge_dst_dummy_property_t{}.view(),
         view_concat(edge_id_view, edge_type_view),
-        cuda::proclaim_return_type<thrust::optional<thrust::tuple<int, edge_type_t>>>(
+        cuda::proclaim_return_type<cuda::std::optional<thrust::tuple<int, edge_type_t>>>(
           [key_func =
              cugraph::detail::compute_gpu_id_from_ext_edge_id_t<edge_t>{
                comm_size,
                major_comm_size,
                minor_comm_size}] __device__(auto,
                                             auto,
-                                            thrust::nullopt_t,
-                                            thrust::nullopt_t,
+                                            cuda::std::nullopt_t,
+                                            cuda::std::nullopt_t,
                                             thrust::tuple<edge_t, edge_type_t> id_and_type) {
-            return thrust::optional<thrust::tuple<int, edge_type_t>>{thrust::make_tuple(
+            return cuda::std::optional<thrust::tuple<int, edge_type_t>>{thrust::make_tuple(
               key_func(thrust::get<0>(id_and_type)), thrust::get<1>(id_and_type))};
           }));
 
@@ -518,9 +520,9 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku
       cugraph::edge_src_dummy_property_t{}.view(),
       cugraph::edge_dst_dummy_property_t{}.view(),
       edge_type_view,
-      cuda::proclaim_return_type<thrust::optional<edge_type_t>>(
-        [] __device__(auto, auto, thrust::nullopt_t, thrust::nullopt_t, edge_type_t et) {
-          return thrust::optional<edge_type_t>{et};
+      cuda::proclaim_return_type<cuda::std::optional<edge_type_t>>(
+        [] __device__(auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_type_t et) {
+          return cuda::std::optional<edge_type_t>{et};
         }));
 
     thrust::sort(handle.get_thrust_policy(), edge_types.begin(), edge_types.end());
diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
index 2b89d214fd7..9816753852f 100644
--- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
+++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,6 +40,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -47,7 +48,6 @@
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/iterator_traits.h>
-#include <thrust/optional.h>
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 
@@ -138,7 +138,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   BufferKeyOutputIterator buffer_key_output_first,
   BufferValueOutputIterator buffer_value_output_first,
   size_t* buffer_idx_ptr,
@@ -238,7 +238,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree(
 
     if (edge_partition_e_mask) {
       for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) {
-        e_op_result_t e_op_result{thrust::nullopt};
+        e_op_result_t e_op_result{cuda::std::nullopt};
 
         if (i < static_cast<size_t>(num_edges_this_warp)) {
           auto key_idx_this_warp = static_cast<vertex_t>(thrust::distance(
@@ -261,7 +261,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree(
       }
     } else {
       for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) {
-        e_op_result_t e_op_result{thrust::nullopt};
+        e_op_result_t e_op_result{cuda::std::nullopt};
 
         if (i < static_cast<size_t>(num_edges_this_warp)) {
           auto key_idx_this_warp = static_cast<vertex_t>(thrust::distance(
@@ -304,7 +304,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   BufferKeyOutputIterator buffer_key_output_first,
   BufferValueOutputIterator buffer_value_output_first,
   size_t* buffer_idx_ptr,
@@ -359,7 +359,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree(
 
     if (edge_partition_e_mask) {
       for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) {
-        e_op_result_t e_op_result{thrust::nullopt};
+        e_op_result_t e_op_result{cuda::std::nullopt};
         if ((i < static_cast<size_t>(local_degree)) &&
             ((*edge_partition_e_mask).get(local_edge_offset + i))) {
           e_op_result = call_e_op(i);
@@ -370,7 +370,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree(
       }
     } else {
       for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) {
-        e_op_result_t e_op_result{thrust::nullopt};
+        e_op_result_t e_op_result{cuda::std::nullopt};
         if (i < static_cast<size_t>(local_degree)) { e_op_result = call_e_op(i); }
 
         warp_push_buffer_elements(
@@ -400,7 +400,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   BufferKeyOutputIterator buffer_key_output_first,
   BufferValueOutputIterator buffer_value_output_first,
   size_t* buffer_idx_ptr,
@@ -429,7 +429,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree(
     ((static_cast<size_t>(num_edges) + (raft::warp_size() - 1)) / raft::warp_size()) *
     raft::warp_size();
   while (idx < rounded_up_num_edges) {
-    e_op_result_t e_op_result{thrust::nullopt};
+    e_op_result_t e_op_result{cuda::std::nullopt};
     if (idx < num_edges) {
       auto key_idx = thrust::distance(
         key_local_degree_offsets.begin() + 1,
@@ -494,7 +494,7 @@ void extract_transform_v_frontier_e_edge_partition(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   OptionalOutputKeyIterator output_key_first,
   OptionalOutputValueIterator output_value_first,
   raft::device_span<size_t> count /* size = 1 */,
@@ -665,21 +665,21 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
                                                      EdgeOp>::type;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -692,12 +692,12 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
   static_assert(!std::is_same_v<e_op_result_t, void>);
   static_assert(
     std::is_same_v<e_op_result_t,
-                   std::conditional_t<!std::is_same_v<output_key_t, void> &&
-                                        !std::is_same_v<output_value_t, void>,
-                                      thrust::optional<thrust::tuple<output_key_t, output_value_t>>,
-                                      std::conditional_t<!std::is_same_v<output_key_t, void>,
-                                                         thrust::optional<output_key_t>,
-                                                         thrust::optional<output_value_t>>>>);
+                   std::conditional_t<
+                     !std::is_same_v<output_key_t, void> && !std::is_same_v<output_value_t, void>,
+                     cuda::std::optional<thrust::tuple<output_key_t, output_value_t>>,
+                     std::conditional_t<!std::is_same_v<output_key_t, void>,
+                                        cuda::std::optional<output_key_t>,
+                                        cuda::std::optional<output_value_t>>>>);
 
   constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v<key_t, vertex_t> &&
                               KeyBucketType::is_sorted_unique;
@@ -1401,10 +1401,10 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
           graph_view.local_edge_partition_view(partition_idx));
       auto edge_partition_e_mask =
         edge_mask_view
-          ? thrust::make_optional<
+          ? cuda::std::make_optional<
               detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
               *edge_mask_view, partition_idx)
-          : thrust::nullopt;
+          : cuda::std::nullopt;
       size_t num_streams_per_loop{1};
       if (stream_pool_indices) {
         assert((*stream_pool_indices).size() >= num_concurrent_loops);
diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh
index 847c1db6937..d21a8153dc6 100644
--- a/cpp/src/prims/detail/nbr_intersection.cuh
+++ b/cpp/src/prims/detail/nbr_intersection.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,6 +36,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -49,7 +50,6 @@
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/remove.h>
 #include <thrust/scan.h>
@@ -106,7 +106,7 @@ struct update_rx_major_local_degree_t {
   int minor_comm_size{};
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
-  thrust::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
+  cuda::std::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
     edge_partition_e_mask{};
 
   size_t reordered_idx_first{};
@@ -155,7 +155,7 @@ struct update_rx_major_local_nbrs_t {
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
   edge_partition_e_input_device_view_t edge_partition_e_value_input{};
-  thrust::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
+  cuda::std::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
     edge_partition_e_mask{};
 
   size_t reordered_idx_first{};
@@ -214,7 +214,7 @@ struct update_rx_major_local_nbrs_t {
     if (local_degree > 0) {
       if (edge_partition_e_mask) {
         auto mask_first = (*edge_partition_e_mask).value_first();
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           auto input_first =
             thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first());
           copy_if_mask_set(input_first,
@@ -233,7 +233,7 @@ struct update_rx_major_local_nbrs_t {
                            local_degree);
         }
       } else {
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           auto input_first =
             thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) +
             edge_offset;
@@ -278,7 +278,7 @@ struct pick_min_degree_t {
   raft::device_span<edge_t const> second_element_offsets{};
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
-  thrust::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
+  cuda::std::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
     edge_partition_e_mask{};
 
   __device__ edge_t operator()(thrust::tuple<vertex_t, vertex_t> pair) const
@@ -413,7 +413,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
   edge_partition_e_input_device_view_t edge_partition_e_value_input{};
-  thrust::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
+  cuda::std::optional<edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>
     edge_partition_e_mask{};
 
   VertexPairIterator vertex_pair_first;
@@ -430,7 +430,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
 
     auto pair = *(vertex_pair_first + i);
     vertex_t const* indices0{};
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        edge_property_value_t const*,
                        void*>
       edge_property_values0{};
@@ -439,7 +439,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
     edge_t local_degree0{0};
     if constexpr (std::is_same_v<FirstElementToIdxMap, void*>) {
       indices0 = edge_partition.indices();
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         edge_property_values0 = edge_partition_e_value_input.value_first();
       }
 
@@ -468,7 +468,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
       }
     } else {
       indices0 = first_element_indices.begin();
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         edge_property_values0 = first_element_edge_property_values;
       }
 
@@ -478,7 +478,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
     }
 
     vertex_t const* indices1{};
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        edge_property_value_t const*,
                        void*>
       edge_property_values1{};
@@ -487,7 +487,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
     edge_t local_degree1{0};
     if constexpr (std::is_same_v<SecondElementToIdxMap, void*>) {
       indices1 = edge_partition.indices();
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         edge_property_values1 = edge_partition_e_value_input.value_first();
       }
 
@@ -516,7 +516,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
       }
     } else {
       indices1 = second_element_indices.begin();
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         edge_property_values1 = second_element_edge_property_values;
       }
 
@@ -618,7 +618,7 @@ struct gatherv_indices_t {
     // in a single warp (better optimize if this becomes a performance bottleneck)
 
     for (int j = 0; j < minor_comm_size; ++j) {
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         auto zipped_gathered_begin = thrust::make_zip_iterator(
           thrust::make_tuple(gathered_intersection_indices.begin(),
                              gathered_nbr_intersection_e_property_values0,
@@ -664,7 +664,7 @@ struct gatherv_indices_t {
 // number of groups"  is recommended for load-balancing.
 template <typename GraphViewType, typename VertexPairIterator, typename EdgeValueInputIterator>
 std::conditional_t<
-  !std::is_same_v<typename EdgeValueInputIterator::value_type, thrust::nullopt_t>,
+  !std::is_same_v<typename EdgeValueInputIterator::value_type, cuda::std::nullopt_t>,
   std::tuple<rmm::device_uvector<size_t>,
              rmm::device_uvector<typename GraphViewType::vertex_type>,
              rmm::device_uvector<typename EdgeValueInputIterator::value_type>,
@@ -684,7 +684,7 @@ nbr_intersection(raft::handle_t const& handle,
   using edge_property_value_t = typename EdgeValueInputIterator::value_type;
 
   using edge_partition_e_input_device_view_t =
-    std::conditional_t<std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
                        detail::edge_partition_edge_property_device_view_t<
                          edge_t,
@@ -692,16 +692,16 @@ nbr_intersection(raft::handle_t const& handle,
                          edge_property_value_t>>;
 
   using optional_property_buffer_value_type =
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        edge_property_value_t,
                        void>;
 
   using optional_property_buffer_view_t =
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        edge_property_value_t const*,
                        void*>;
   using optional_property_buffer_mutable_view_t =
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        edge_property_value_t*,
                        void*>;
 
@@ -907,11 +907,11 @@ nbr_intersection(raft::handle_t const& handle,
               graph_view.local_edge_partition_view(i));
           auto edge_partition_e_mask =
             edge_mask_view
-              ? thrust::make_optional<
+              ? cuda::std::make_optional<
                   detail::
                     edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
                   *edge_mask_view, i)
-              : thrust::nullopt;
+              : cuda::std::nullopt;
           auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
           auto reordered_idx_first =
             (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1];
@@ -950,7 +950,7 @@ nbr_intersection(raft::handle_t const& handle,
 
         optional_property_buffer_mutable_view_t optional_local_e_property_values{};
 
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           local_e_property_values_for_rx_majors.resize(local_nbrs_for_rx_majors.size(),
                                                        handle.get_stream());
           optional_local_e_property_values = local_e_property_values_for_rx_majors.data();
@@ -964,11 +964,11 @@ nbr_intersection(raft::handle_t const& handle,
             edge_partition_e_input_device_view_t(edge_value_input, i);
           auto edge_partition_e_mask =
             edge_mask_view
-              ? thrust::make_optional<
+              ? cuda::std::make_optional<
                   detail::
                     edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
                   *edge_mask_view, i)
-              : thrust::nullopt;
+              : cuda::std::nullopt;
 
           auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
           auto reordered_idx_first =
@@ -1045,7 +1045,7 @@ nbr_intersection(raft::handle_t const& handle,
       std::tie(major_nbr_indices, std::ignore) = shuffle_values(
         major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream());
 
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         std::tie(major_e_property_values, std::ignore) =
           shuffle_values(major_comm,
                          local_e_property_values_for_rx_majors.begin(),
@@ -1132,16 +1132,18 @@ nbr_intersection(raft::handle_t const& handle,
     edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions());
     edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions());
 
-    [[maybe_unused]] std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
-                                        std::vector<rmm::device_uvector<edge_property_value_t>>,
-                                        std::byte /* dummy */>
+    [[maybe_unused]] std::conditional_t<
+      !std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
+      std::vector<rmm::device_uvector<edge_property_value_t>>,
+      std::byte /* dummy */>
       edge_partition_nbr_intersection_e_property_values0{};
-    [[maybe_unused]] std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
-                                        std::vector<rmm::device_uvector<edge_property_value_t>>,
-                                        std::byte /* dummy */>
+    [[maybe_unused]] std::conditional_t<
+      !std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
+      std::vector<rmm::device_uvector<edge_property_value_t>>,
+      std::byte /* dummy */>
       edge_partition_nbr_intersection_e_property_values1{};
 
-    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       edge_partition_nbr_intersection_e_property_values0.reserve(
         graph_view.number_of_local_edge_partitions());
       edge_partition_nbr_intersection_e_property_values1.reserve(
@@ -1198,10 +1200,10 @@ nbr_intersection(raft::handle_t const& handle,
           edge_partition_e_input_device_view_t(edge_value_input, i);
         auto edge_partition_e_mask =
           edge_mask_view
-            ? thrust::make_optional<
+            ? cuda::std::make_optional<
                 detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
                 *edge_mask_view, i)
-            : thrust::nullopt;
+            : cuda::std::nullopt;
 
         auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
 
@@ -1249,7 +1251,7 @@ nbr_intersection(raft::handle_t const& handle,
         optional_property_buffer_mutable_view_t
           rx_v_pair_optional_nbr_intersection_e_property_values1{};
 
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           rx_v_pair_nbr_intersection_e_property_values0.resize(
             rx_v_pair_nbr_intersection_indices.size(), handle.get_stream());
           rx_v_pair_nbr_intersection_e_property_values1.resize(
@@ -1264,7 +1266,7 @@ nbr_intersection(raft::handle_t const& handle,
 
         if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) {
           optional_property_buffer_view_t optional_major_e_property_values{};
-          if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
             optional_major_e_property_values = major_e_property_values.data();
           }
 
@@ -1309,7 +1311,7 @@ nbr_intersection(raft::handle_t const& handle,
           CUGRAPH_FAIL("unimplemented.");
         }
 
-        if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           rx_v_pair_nbr_intersection_indices.resize(
             thrust::distance(rx_v_pair_nbr_intersection_indices.begin(),
                              thrust::remove(handle.get_thrust_policy(),
@@ -1515,7 +1517,7 @@ nbr_intersection(raft::handle_t const& handle,
             rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(),
             handle.get_stream());
 
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           device_multicast_sendrecv(minor_comm,
                                     rx_v_pair_nbr_intersection_e_property_values0.begin(),
                                     rx_v_pair_nbr_intersection_index_tx_counts,
@@ -1548,7 +1550,7 @@ nbr_intersection(raft::handle_t const& handle,
             gathered_nbr_intersection_e_property_values1.size(), handle.get_stream());
         }
 
-        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
           thrust::for_each(
             handle.get_thrust_policy(),
             thrust::make_counting_iterator(size_t{0}),
@@ -1598,7 +1600,7 @@ nbr_intersection(raft::handle_t const& handle,
       edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes));
       edge_partition_nbr_intersection_indices.push_back(
         std::move(combined_nbr_intersection_indices));
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         edge_partition_nbr_intersection_e_property_values0.push_back(
           std::move(combined_nbr_intersection_e_property_values0));
         edge_partition_nbr_intersection_e_property_values1.push_back(
@@ -1612,7 +1614,7 @@ nbr_intersection(raft::handle_t const& handle,
       num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size();
     }
     nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream());
-    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(),
                                                  handle.get_stream());
       nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(),
@@ -1631,7 +1633,7 @@ nbr_intersection(raft::handle_t const& handle,
                    edge_partition_nbr_intersection_indices[i].end(),
                    nbr_intersection_indices.begin() + index_offset);
 
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         thrust::copy(handle.get_thrust_policy(),
                      edge_partition_nbr_intersection_e_property_values0[i].begin(),
                      edge_partition_nbr_intersection_e_property_values0[i].end(),
@@ -1660,10 +1662,10 @@ nbr_intersection(raft::handle_t const& handle,
     auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0);
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, 0)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     rmm::device_uvector<edge_t> nbr_intersection_sizes(
       input_size,
@@ -1699,7 +1701,7 @@ nbr_intersection(raft::handle_t const& handle,
     optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values0{};
     optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values1{};
 
-    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(),
                                                  handle.get_stream());
       nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(),
@@ -1770,7 +1772,7 @@ nbr_intersection(raft::handle_t const& handle,
         size_t{1} << 27,
         static_cast<size_t>(thrust::distance(nbr_intersection_indices.begin() + num_scanned,
                                              nbr_intersection_indices.end())));
-      if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         num_copied += static_cast<size_t>(thrust::distance(
           tmp_indices.begin() + num_copied,
           thrust::copy_if(handle.get_thrust_policy(),
@@ -1804,12 +1806,12 @@ nbr_intersection(raft::handle_t const& handle,
       num_scanned += this_scan_size;
     }
     nbr_intersection_indices = std::move(tmp_indices);
-    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       nbr_intersection_e_property_values0 = std::move(tmp_property_values0);
       nbr_intersection_e_property_values1 = std::move(tmp_property_values1);
     }
 #else
-    if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       nbr_intersection_indices.resize(
         thrust::distance(nbr_intersection_indices.begin(),
                          thrust::remove(handle.get_thrust_policy(),
@@ -1845,7 +1847,7 @@ nbr_intersection(raft::handle_t const& handle,
 
   // 5. Return
 
-  if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+  if constexpr (std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
     return std::make_tuple(std::move(nbr_intersection_offsets),
                            std::move(nbr_intersection_indices));
 
diff --git a/cpp/src/prims/detail/partition_v_frontier.cuh b/cpp/src/prims/detail/partition_v_frontier.cuh
index 018960d9a54..f5249aec304 100644
--- a/cpp/src/prims/detail/partition_v_frontier.cuh
+++ b/cpp/src/prims/detail/partition_v_frontier.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,14 +35,12 @@
 #include <thrust/count.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
 #include <thrust/tuple.h>
 #include <thrust/unique.h>
 
-#include <optional>
 #include <tuple>
 
 namespace cugraph {
diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
index c521774a50d..1e47bb53a9e 100644
--- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
+++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh
@@ -46,6 +46,7 @@
 
 #include <cub/cub.cuh>
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
@@ -54,7 +55,6 @@
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/scatter.h>
 #include <thrust/set_operations.h>
 #include <thrust/transform_reduce.h>
@@ -265,7 +265,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultValueOutputIteratorOrWrapper result_value_output,
   EdgeOp e_op,
   T init /* relevant only if update_major == true */,
@@ -296,7 +296,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse(
   while (idx < key_count) {
     key_t key{};
     vertex_t major{};
-    thrust::optional<vertex_t> major_idx{};
+    cuda::std::optional<vertex_t> major_idx{};
     if constexpr (use_input_key) {
       key       = *(key_first + idx);
       major     = thrust_tuple_get_or_identity<key_t, 0>(key);
@@ -402,7 +402,7 @@ __global__ static void per_v_transform_reduce_e_low_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultValueOutputIteratorOrWrapper result_value_output,
   EdgeOp e_op,
   T init /* relevant only if update_major == true */,
@@ -512,7 +512,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultValueOutputIteratorOrWrapper result_value_output,
   EdgeOp e_op,
   T init /* relevant only if update_major == true */,
@@ -596,7 +596,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree(
           ((static_cast<size_t>(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) *
           raft::warp_size();
         for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) {
-          thrust::optional<T> e_op_result{thrust::nullopt};
+          cuda::std::optional<T> e_op_result{cuda::std::nullopt};
           if ((i < static_cast<size_t>(local_degree)) &&
               (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) {
             e_op_result = call_e_op(i);
@@ -630,7 +630,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree(
           ((static_cast<size_t>(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) *
           raft::warp_size();
         for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) {
-          thrust::optional<T> e_op_result{thrust::nullopt};
+          cuda::std::optional<T> e_op_result{cuda::std::nullopt};
           if (i < static_cast<size_t>(local_degree) && call_pred_op(i)) {
             e_op_result = call_e_op(i);
           }
@@ -699,7 +699,7 @@ __global__ static void per_v_transform_reduce_e_high_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultValueOutputIteratorOrWrapper result_value_output,
   EdgeOp e_op,
   T init /* relevant only if update_major == true */,
@@ -790,7 +790,7 @@ __global__ static void per_v_transform_reduce_e_high_degree(
            per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) *
           per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size;
         for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) {
-          thrust::optional<T> e_op_result{thrust::nullopt};
+          cuda::std::optional<T> e_op_result{cuda::std::nullopt};
           if ((i < static_cast<size_t>(local_degree)) &&
               (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) {
             e_op_result = call_e_op(i);
@@ -835,7 +835,7 @@ __global__ static void per_v_transform_reduce_e_high_degree(
            per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) *
           per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size;
         for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) {
-          thrust::optional<T> e_op_result{thrust::nullopt};
+          cuda::std::optional<T> e_op_result{cuda::std::nullopt};
           if ((i < static_cast<size_t>(local_degree)) && call_pred_op(i)) {
             e_op_result = call_e_op(i);
           }
@@ -1141,7 +1141,7 @@ void per_v_transform_reduce_e_edge_partition(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultValueOutputIteratorOrWrapper output_buffer,
   EdgeOp e_op,
   T major_init,
@@ -1415,21 +1415,21 @@ void per_v_transform_reduce_e(raft::handle_t const& handle,
     typename iterator_value_type_or_default_t<OptionalKeyIterator, vertex_t>::value_type;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -1519,10 +1519,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(static_cast<size_t>(minor_comm_rank)));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, static_cast<size_t>(minor_comm_rank))
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     std::optional<std::vector<size_t>> edge_partition_stream_pool_indices{std::nullopt};
     if (local_vertex_partition_segment_offsets && (handle.get_stream_pool_size() >= max_segments)) {
@@ -1737,10 +1737,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle,
        sorted_unique_key_first,
        sorted_unique_nzd_key_last,
        deg1_v_first = (filter_input_key && graph_view.use_dcs())
-                        ? thrust::make_optional(graph_view.local_vertex_partition_range_first() +
-                                                (*local_vertex_partition_segment_offsets)[3] +
-                                                *((*hypersparse_degree_offsets).rbegin() + 1))
-                        : thrust::nullopt,
+                        ? cuda::std::make_optional(graph_view.local_vertex_partition_range_first() +
+                                                   (*local_vertex_partition_segment_offsets)[3] +
+                                                   *((*hypersparse_degree_offsets).rbegin() + 1))
+                        : cuda::std::nullopt,
        vertex_partition_range_first =
          graph_view.local_vertex_partition_range_first()] __device__(size_t i) {
         if (i == 0) {
@@ -3102,10 +3102,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle,
             graph_view.local_edge_partition_view(partition_idx));
         auto edge_partition_e_mask =
           edge_mask_view
-            ? thrust::make_optional<
+            ? cuda::std::make_optional<
                 detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
                 *edge_mask_view, partition_idx)
-            : thrust::nullopt;
+            : cuda::std::nullopt;
         size_t num_streams_per_loop{1};
         if (stream_pool_indices) {
           assert((*stream_pool_indices).size() >= num_concurrent_loops);
diff --git a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh
index dd0da77851b..3e38b85f105 100644
--- a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh
+++ b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,12 +38,12 @@
 #include <cub/cub.cuh>
 #include <cuda/atomic>
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/adjacent_difference.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -474,10 +474,10 @@ compute_valid_local_nbr_count_inclusive_sums(
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_frontier_major_first =
       aggregate_local_frontier_major_first + local_frontier_displacements[i];
@@ -1255,10 +1255,10 @@ compute_aggregate_local_frontier_local_degrees(
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_frontier_major_first =
       aggregate_local_frontier_major_first + local_frontier_displacements[i];
@@ -1501,9 +1501,9 @@ rmm::device_uvector<typename GraphViewType::edge_type> convert_to_unmasked_local
     thrust::make_counting_iterator(size_t{0}),
     cuda::proclaim_return_type<size_t>(
       [K,
-       key_indices = key_indices ? thrust::make_optional<raft::device_span<size_t const>>(
+       key_indices = key_indices ? cuda::std::make_optional<raft::device_span<size_t const>>(
                                      (*key_indices).data(), (*key_indices).size())
-                                 : thrust::nullopt] __device__(size_t i) {
+                                 : cuda::std::nullopt] __device__(size_t i) {
         return key_indices ? (*key_indices)[i] : i / K;
       }));
   auto pair_first = thrust::make_zip_iterator(local_nbr_indices.begin(), sample_major_idx_first);
@@ -1513,10 +1513,10 @@ rmm::device_uvector<typename GraphViewType::edge_type> convert_to_unmasked_local
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_frontier_major_first =
       aggregate_local_frontier_major_first + local_frontier_displacements[i];
@@ -1863,10 +1863,10 @@ biased_sample_and_compute_local_nbr_indices(
            sample_local_random_numbers.data() + local_frontier_sample_offsets[i],
            local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i]),
          key_indices =
-           key_indices ? thrust::make_optional<raft::device_span<size_t const>>(
+           key_indices ? cuda::std::make_optional<raft::device_span<size_t const>>(
                            (*key_indices).data() + local_frontier_sample_offsets[i],
                            local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i])
-                       : thrust::nullopt,
+                       : cuda::std::nullopt,
          key_idx_to_unique_key_idx =
            raft::device_span<size_t>(aggregate_local_frontier_key_idx_to_unique_key_idx.data() +
                                        local_frontier_displacements[i],
diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh
index 5ebcddfe8da..8f414391596 100644
--- a/cpp/src/prims/detail/transform_v_frontier_e.cuh
+++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,9 +29,9 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <type_traits>
@@ -389,21 +389,21 @@ auto transform_v_frontier_e(raft::handle_t const& handle,
   static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic<e_op_result_t>::value);
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -424,10 +424,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_frontier_key_first =
       aggregate_local_frontier_key_first + local_frontier_displacements[i];
@@ -470,10 +470,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_frontier_key_first =
       aggregate_local_frontier_key_first + local_frontier_displacements[i];
diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh
index 5741c98d90e..bb003aa8747 100644
--- a/cpp/src/prims/extract_transform_e.cuh
+++ b/cpp/src/prims/extract_transform_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,9 +69,9 @@ namespace cugraph {
  * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not
  * access edge property values).
  * @param e_op Quinary operator takes edge source, edge destination, property values for the source,
- * property values for the destination, and property values for the edge and returns thrust::nullopt
- * (if the return value is to be discarded) or a valid @p e_op output to be extracted and
- * accumulated.
+ * property values for the destination, and property values for the edge and returns
+ * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be
+ * extracted and accumulated.
  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
  * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values.
  */
diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh
index ba227b263bc..46984d6b4e5 100644
--- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh
+++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,9 +57,9 @@ namespace cugraph {
  * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not
  * access edge property values).
  * @param e_op Quinary operator takes edge source, edge destination, property values for the source,
- * property values for the destination, and property values for the edge and returns thrust::nullopt
- * (if the return value is to be discarded) or a valid @p e_op output to be extracted and
- * accumulated.
+ * property values for the destination, and property values for the edge and returns
+ * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be
+ * extracted and accumulated.
  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
  * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values.
  */
diff --git a/cpp/src/prims/fill_edge_property.cuh b/cpp/src/prims/fill_edge_property.cuh
index 54d0c454ec2..3e1383707a2 100644
--- a/cpp/src/prims/fill_edge_property.cuh
+++ b/cpp/src/prims/fill_edge_property.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/fill.h>
 #include <thrust/iterator/constant_iterator.h>
 
@@ -50,10 +51,10 @@ void fill_edge_property(raft::handle_t const& handle,
   for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) {
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     if constexpr (cugraph::has_packed_bool_element<
                     std::remove_reference_t<decltype(value_firsts[i])>,
diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
index f03e8f54fb2..728c1eac2bd 100644
--- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
+++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -43,7 +44,6 @@
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/merge.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -109,7 +109,7 @@ struct call_intersection_op_t {
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu>
     edge_partition{};
-  thrust::optional<raft::device_span<typename GraphViewType::vertex_type const>> unique_vertices;
+  cuda::std::optional<raft::device_span<typename GraphViewType::vertex_type const>> unique_vertices;
   VertexValueInputIterator vertex_property_first;
   IntersectionOp intersection_op{};
   size_t const* nbr_offsets{nullptr};
@@ -135,17 +135,17 @@ struct call_intersection_op_t {
     auto intersection = raft::device_span<typename GraphViewType::vertex_type const>(
       nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]);
 
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        raft::device_span<edge_property_value_t const>,
                        std::byte /* dummy */>
       property_values0{};
 
-    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+    std::conditional_t<!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>,
                        raft::device_span<edge_property_value_t const>,
                        std::byte /* dummy */>
       property_values1{};
 
-    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
       property_values0 = raft::device_span<edge_property_value_t const>(
         nbr_intersection_property_values0 + nbr_offsets[i],
         nbr_intersection_property_values0 + +nbr_offsets[i + 1]);
@@ -392,7 +392,7 @@ void per_v_pair_transform_dst_nbr_intersection(
       [[maybe_unused]] rmm::device_uvector<edge_property_value_t>
         r_nbr_intersection_property_values1(size_t{0}, handle.get_stream());
 
-      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_property_value_t, cuda::std::nullopt_t>) {
         std::tie(intersection_offsets,
                  intersection_indices,
                  r_nbr_intersection_property_values0,
@@ -430,7 +430,7 @@ void per_v_pair_transform_dst_nbr_intersection(
                            VertexPairIterator,
                            VertexPairValueOutputIterator>{
                            edge_partition,
-                           thrust::make_optional<raft::device_span<vertex_t const>>(
+                           cuda::std::make_optional<raft::device_span<vertex_t const>>(
                              (*sorted_unique_vertices).data(), (*sorted_unique_vertices).size()),
                            vertex_value_input_for_sorted_unique_vertices_first,
                            intersection_op,
@@ -442,28 +442,29 @@ void per_v_pair_transform_dst_nbr_intersection(
                            vertex_pair_first,
                            vertex_pair_value_output_first});
       } else {
-        thrust::for_each(handle.get_thrust_policy(),
-                         thrust::make_counting_iterator(size_t{0}),
-                         thrust::make_counting_iterator(this_chunk_size),
-                         detail::call_intersection_op_t<
-                           GraphViewType,
-                           VertexValueInputIterator,
-                           typename decltype(r_nbr_intersection_property_values0)::const_pointer,
-                           IntersectionOp,
-                           decltype(chunk_vertex_pair_index_first),
-                           VertexPairIterator,
-                           VertexPairValueOutputIterator>{
-                           edge_partition,
-                           thrust::optional<raft::device_span<vertex_t const>>{thrust::nullopt},
-                           vertex_value_input_first,
-                           intersection_op,
-                           intersection_offsets.data(),
-                           intersection_indices.data(),
-                           r_nbr_intersection_property_values0.data(),
-                           r_nbr_intersection_property_values1.data(),
-                           chunk_vertex_pair_index_first,
-                           vertex_pair_first,
-                           vertex_pair_value_output_first});
+        thrust::for_each(
+          handle.get_thrust_policy(),
+          thrust::make_counting_iterator(size_t{0}),
+          thrust::make_counting_iterator(this_chunk_size),
+          detail::call_intersection_op_t<
+            GraphViewType,
+            VertexValueInputIterator,
+            typename decltype(r_nbr_intersection_property_values0)::const_pointer,
+            IntersectionOp,
+            decltype(chunk_vertex_pair_index_first),
+            VertexPairIterator,
+            VertexPairValueOutputIterator>{
+            edge_partition,
+            cuda::std::optional<raft::device_span<vertex_t const>>{cuda::std::nullopt},
+            vertex_value_input_first,
+            intersection_op,
+            intersection_offsets.data(),
+            intersection_indices.data(),
+            r_nbr_intersection_property_values0.data(),
+            r_nbr_intersection_property_values1.data(),
+            chunk_vertex_pair_index_first,
+            vertex_pair_first,
+            vertex_pair_value_output_first});
       }
 
       chunk_vertex_pair_index_first += this_chunk_size;
diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
index 30706632ad2..812e0a9b926 100644
--- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,11 +35,11 @@
 #include <cub/cub.cuh>
 #include <cuda/atomic>
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -93,7 +93,7 @@ struct transform_local_nbr_indices_t {
   using edge_t   = typename GraphViewType::edge_type;
 
   edge_partition_device_view_t<vertex_t, edge_t, GraphViewType::is_multi_gpu> edge_partition{};
-  thrust::optional<size_t const*> local_key_indices{thrust::nullopt};
+  cuda::std::optional<size_t const*> local_key_indices{cuda::std::nullopt};
   KeyIterator key_first{};
   LocalNbrIdxIterator local_nbr_idx_first{};
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input;
@@ -101,7 +101,7 @@ struct transform_local_nbr_indices_t {
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input;
   EdgeOp e_op{};
   edge_t invalid_idx{};
-  thrust::optional<T> invalid_value{thrust::nullopt};
+  cuda::std::optional<T> invalid_value{cuda::std::nullopt};
   size_t K{};
 
   __device__ T operator()(size_t i) const
@@ -241,21 +241,21 @@ per_v_random_select_transform_e(raft::handle_t const& handle,
   using key_buffer_t = dataframe_buffer_type_t<key_t>;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -430,7 +430,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle,
                                       EdgeOp,
                                       T>{
           edge_partition,
-          thrust::make_optional(edge_partition_sample_key_index_first),
+          cuda::std::make_optional(edge_partition_sample_key_index_first),
           edge_partition_key_list_first,
           edge_partition_sample_local_nbr_index_first,
           edge_partition_src_value_input,
@@ -454,7 +454,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle,
                                       edge_partition_e_input_device_view_t,
                                       EdgeOp,
                                       T>{edge_partition,
-                                         thrust::nullopt,
+                                         cuda::std::nullopt,
                                          edge_partition_key_list_first,
                                          edge_partition_sample_local_nbr_index_first,
                                          edge_partition_src_value_input,
diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
index c13816242bc..4ebda9d42c4 100644
--- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,6 +40,7 @@
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
@@ -49,7 +50,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/scatter.h>
 #include <thrust/sort.h>
@@ -89,15 +89,16 @@ struct tuple_to_minor_comm_rank_t {
   int minor_comm_size{};
 
   template <typename edge_value_type = edge_value_t>
-  __device__ std::enable_if_t<!std::is_same_v<edge_value_type, thrust::nullopt_t>, int> operator()(
+  __device__ std::enable_if_t<!std::is_same_v<edge_value_type, cuda::std::nullopt_t>, int>
+  operator()(
     thrust::tuple<vertex_t, vertex_t, edge_value_t> val /* major, minor key, edge value */) const
   {
     return key_func(thrust::get<1>(val)) % minor_comm_size;
   }
 
   template <typename edge_value_type = edge_value_t>
-  __device__ std::enable_if_t<std::is_same_v<edge_value_type, thrust::nullopt_t>, int> operator()(
-    thrust::tuple<vertex_t, vertex_t> val /* major, minor key */) const
+  __device__ std::enable_if_t<std::is_same_v<edge_value_type, cuda::std::nullopt_t>, int>
+  operator()(thrust::tuple<vertex_t, vertex_t> val /* major, minor key */) const
   {
     return key_func(thrust::get<1>(val)) % minor_comm_size;
   }
@@ -123,13 +124,13 @@ template <typename vertex_t,
           typename KeyAggregatedEdgeOp>
 struct call_key_aggregated_e_op_t {
   EdgePartitionDeviceView edge_partition{};
-  thrust::optional<EdgeMajorValueMap> edge_major_value_map{};
+  cuda::std::optional<EdgeMajorValueMap> edge_major_value_map{};
   EdgePartitionMajorValueInputWrapper edge_partition_major_value_input{};
   EdgeMinorKeyValueMap edge_minor_key_value_map{};
   KeyAggregatedEdgeOp key_aggregated_e_op{};
 
   template <typename edge_value_type = edge_value_t>
-  __device__ std::enable_if_t<!std::is_same_v<edge_value_type, thrust::nullopt_t>, e_op_result_t>
+  __device__ std::enable_if_t<!std::is_same_v<edge_value_type, cuda::std::nullopt_t>, e_op_result_t>
   operator()(thrust::tuple<vertex_t, vertex_t, edge_value_t>
                val /* major, minor key, aggregated edge value */) const
   {
@@ -145,7 +146,7 @@ struct call_key_aggregated_e_op_t {
   }
 
   template <typename edge_value_type = edge_value_t>
-  __device__ std::enable_if_t<std::is_same_v<edge_value_type, thrust::nullopt_t>, e_op_result_t>
+  __device__ std::enable_if_t<std::is_same_v<edge_value_type, cuda::std::nullopt_t>, e_op_result_t>
   operator()(thrust::tuple<vertex_t, vertex_t> val /* major, minor key */) const
   {
     auto major     = thrust::get<0>(val);
@@ -155,7 +156,7 @@ struct call_key_aggregated_e_op_t {
                        : edge_partition_major_value_input.get(
                            edge_partition.major_offset_from_major_nocheck(major));
     return key_aggregated_e_op(
-      major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), thrust::nullopt);
+      major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), cuda::std::nullopt);
   }
 };
 
@@ -284,16 +285,16 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
   using edge_value_t     = typename EdgeValueInputWrapper::value_type;
   using kv_pair_value_t  = typename KVStoreViewType::value_type;
   using optional_edge_value_buffer_value_type =
-    std::conditional_t<!std::is_same_v<edge_value_t, thrust::nullopt_t>, edge_value_t, void>;
+    std::conditional_t<!std::is_same_v<edge_value_t, cuda::std::nullopt_t>, edge_value_t, void>;
 
   static_assert(
-    std::is_same_v<edge_value_t, thrust::nullopt_t> || std::is_arithmetic_v<edge_value_t>,
+    std::is_same_v<edge_value_t, cuda::std::nullopt_t> || std::is_arithmetic_v<edge_value_t>,
     "Currently only scalar values are supported, should be extended to support thrust::tuple of "
     "arithmetic types and void (for dummy property values) to be consistent with other "
     "primitives.");  // this will also require a custom edge value aggregation op.
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
@@ -303,7 +304,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
       vertex_t,
       typename EdgeDstKeyInputWrapper::value_iterator>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -315,7 +316,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
 
   auto total_global_mem = handle.get_device_properties().totalGlobalMem;
   size_t element_size   = sizeof(vertex_t) * 2;  // major + minor keys
-  if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+  if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
     static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic<edge_value_t>::value);
     if constexpr (is_thrust_tuple_of_arithmetic<edge_value_t>::value) {
       element_size += sum_thrust_tuple_element_sizes<edge_value_t>();
@@ -323,7 +324,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
       element_size += sizeof(edge_value_t);
     }
   }
-  if constexpr (!std::is_same_v<edge_src_value_t, thrust::nullopt_t>) {
+  if constexpr (!std::is_same_v<edge_src_value_t, cuda::std::nullopt_t>) {
     static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic<edge_src_value_t>::value);
     if constexpr (is_thrust_tuple_of_arithmetic<edge_src_value_t>::value) {
       element_size += sum_thrust_tuple_element_sizes<edge_src_value_t>();
@@ -350,10 +351,10 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     auto edge_partition_src_value_input =
       edge_partition_src_input_device_view_t(edge_src_value_input, i);
@@ -472,7 +473,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
                             1,
                             handle.get_stream());
           handle.sync_stream();
-          if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+          if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
             detail::copy_if_mask_set(
               handle,
               thrust::make_zip_iterator(minor_key_first,
@@ -505,7 +506,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
           (offsets_with_mask ? (*offsets_with_mask).data() : edge_partition.offsets()) +
             h_vertex_offsets[j],
           detail::rebase_offset_t<edge_t>{h_edge_offsets[j]});
-        if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
           cub::DeviceSegmentedSort::SortPairs(
             static_cast<void*>(nullptr),
             tmp_storage_bytes,
@@ -536,7 +537,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         if (tmp_storage_bytes > d_tmp_storage.size()) {
           d_tmp_storage = rmm::device_uvector<std::byte>(tmp_storage_bytes, handle.get_stream());
         }
-        if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
           cub::DeviceSegmentedSort::SortPairs(
             d_tmp_storage.data(),
             tmp_storage_bytes,
@@ -573,7 +574,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
           thrust::make_zip_iterator(unreduced_majors.begin(), unreduced_minor_keys.begin());
         auto output_key_first =
           thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin());
-        if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
           reduced_size +=
             thrust::distance(output_key_first + reduced_size,
                              thrust::get<0>(thrust::reduce_by_key(
@@ -626,7 +627,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
       auto const minor_comm_size = minor_comm.get_size();
 
       rmm::device_uvector<size_t> d_tx_value_counts(0, handle.get_stream());
-      if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
         auto triplet_first =
           thrust::make_zip_iterator(tmp_majors.begin(),
                                     tmp_minor_keys.begin(),
@@ -782,7 +783,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         tmp_minor_keys.resize(0, handle.get_stream());
         tmp_minor_keys.shrink_to_fit(handle.get_stream());
 
-        if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
           std::tie(rx_key_aggregated_edge_values, std::ignore) =
             shuffle_values(minor_comm,
                            detail::get_optional_dataframe_buffer_begin<edge_value_t>(
@@ -795,7 +796,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         detail::shrink_to_fit_optional_dataframe_buffer<optional_edge_value_buffer_value_type>(
           tmp_key_aggregated_edge_values, handle.get_stream());
       } else {
-        if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+        if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
           auto triplet_first =
             thrust::make_zip_iterator(tmp_majors.begin(),
                                       tmp_minor_keys.begin(),
@@ -820,7 +821,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
       }
 
       auto key_pair_first = thrust::make_zip_iterator(rx_majors.begin(), rx_minor_keys.begin());
-      if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+      if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
         if (rx_majors.size() >
             mem_frugal_threshold) {  // trade-off parallelism to lower peak memory
           auto second_first =
@@ -956,15 +957,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
 
     auto major_value_map_device_view =
       (GraphViewType::is_multi_gpu && edge_src_value_input.keys())
-        ? thrust::make_optional<detail::kv_binary_search_store_device_view_t<
+        ? cuda::std::make_optional<detail::kv_binary_search_store_device_view_t<
             decltype(multi_gpu_major_value_map_ptr->view())>>(multi_gpu_major_value_map_ptr->view())
-        : thrust::nullopt;
+        : cuda::std::nullopt;
     std::conditional_t<KVStoreViewType::binary_search,
                        detail::kv_binary_search_store_device_view_t<KVStoreViewType>,
                        detail::kv_cuco_store_find_device_view_t<KVStoreViewType>>
       dst_key_value_map_device_view(
         GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view);
-    if constexpr (!std::is_same_v<edge_value_t, thrust::nullopt_t>) {
+    if constexpr (!std::is_same_v<edge_value_t, cuda::std::nullopt_t>) {
       auto triplet_first = thrust::make_zip_iterator(
         tmp_majors.begin(),
         tmp_minor_keys.begin(),
diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh
index aaa2703f1ae..d5d64f708ba 100644
--- a/cpp/src/prims/transform_e.cuh
+++ b/cpp/src/prims/transform_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
@@ -273,21 +274,21 @@ void transform_e(raft::handle_t const& handle,
   using edge_t   = typename GraphViewType::edge_type;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -306,10 +307,10 @@ void transform_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     edge_partition_src_input_device_view_t edge_partition_src_value_input{};
     edge_partition_dst_input_device_view_t edge_partition_dst_value_input{};
@@ -464,21 +465,21 @@ void transform_e(raft::handle_t const& handle,
     std::is_same_v<typename EdgeBucketType::key_type, thrust::tuple<vertex_t, vertex_t>>);
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -541,10 +542,10 @@ void transform_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     if (do_expensive_check) {
       CUGRAPH_EXPECTS(
diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
index c938b10fbbb..eef34938c57 100644
--- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
+++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -43,7 +44,6 @@
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/merge.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -249,14 +249,14 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v(
   using weight_t = float;  // dummy
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh
index 43722550c58..1e45fea0608 100644
--- a/cpp/src/prims/transform_reduce_e.cuh
+++ b/cpp/src/prims/transform_reduce_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,11 +35,11 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/execution_policy.h>
 #include <thrust/fill.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
@@ -68,7 +68,7 @@ __global__ static void transform_reduce_e_hypersparse(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultIterator result_iter /* size 1 */,
   EdgeOp e_op)
 {
@@ -162,7 +162,7 @@ __global__ static void transform_reduce_e_low_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultIterator result_iter /* size 1 */,
   EdgeOp e_op)
 {
@@ -251,7 +251,7 @@ __global__ static void transform_reduce_e_mid_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultIterator result_iter /* size 1 */,
   EdgeOp e_op)
 {
@@ -329,7 +329,7 @@ __global__ static void transform_reduce_e_high_degree(
   EdgePartitionSrcValueInputWrapper edge_partition_src_value_input,
   EdgePartitionDstValueInputWrapper edge_partition_dst_value_input,
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
-  thrust::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
+  cuda::std::optional<EdgePartitionEdgeMaskWrapper> edge_partition_e_mask,
   ResultIterator result_iter /* size 1 */,
   EdgeOp e_op)
 {
@@ -445,21 +445,21 @@ T transform_reduce_e(raft::handle_t const& handle,
   using edge_t   = typename GraphViewType::edge_type;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -486,10 +486,10 @@ T transform_reduce_e(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     edge_partition_src_input_device_view_t edge_partition_src_value_input{};
     edge_partition_dst_input_device_view_t edge_partition_dst_value_input{};
diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh
index 3abce6f8bd5..8786336bd10 100644
--- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh
+++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,10 +31,10 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
@@ -108,7 +108,7 @@ __global__ static void transform_reduce_by_src_dst_key_hypersparse(
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
   EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input,
   EdgePartitionEdgeMaskWrapper edge_partition_e_mask,
-  thrust::optional<raft::device_span<typename GraphViewType::edge_type const>>
+  cuda::std::optional<raft::device_span<typename GraphViewType::edge_type const>>
     edge_offsets_with_mask,
   EdgeOp e_op,
   typename GraphViewType::vertex_type* keys,
@@ -198,7 +198,7 @@ __global__ static void transform_reduce_by_src_dst_key_low_degree(
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
   EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input,
   EdgePartitionEdgeMaskWrapper edge_partition_e_mask,
-  thrust::optional<raft::device_span<typename GraphViewType::edge_type const>>
+  cuda::std::optional<raft::device_span<typename GraphViewType::edge_type const>>
     edge_offsets_with_mask,
   EdgeOp e_op,
   typename GraphViewType::vertex_type* keys,
@@ -284,7 +284,7 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree(
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
   EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input,
   EdgePartitionEdgeMaskWrapper edge_partition_e_mask,
-  thrust::optional<raft::device_span<typename GraphViewType::edge_type const>>
+  cuda::std::optional<raft::device_span<typename GraphViewType::edge_type const>>
     edge_offsets_with_mask,
   EdgeOp e_op,
   typename GraphViewType::vertex_type* keys,
@@ -383,7 +383,7 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree(
   EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input,
   EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input,
   EdgePartitionEdgeMaskWrapper edge_partition_e_mask,
-  thrust::optional<raft::device_span<typename GraphViewType::edge_type const>>
+  cuda::std::optional<raft::device_span<typename GraphViewType::edge_type const>>
     edge_offsets_with_mask,
   EdgeOp e_op,
   typename GraphViewType::vertex_type* keys,
@@ -520,21 +520,21 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
   using edge_t   = typename GraphViewType::edge_type;
 
   using edge_partition_src_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeSrcValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeSrcValueInputWrapper::value_iterator,
       typename EdgeSrcValueInputWrapper::value_type>>;
   using edge_partition_dst_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeDstValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_endpoint_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_endpoint_property_device_view_t<
       vertex_t,
       typename EdgeDstValueInputWrapper::value_iterator,
       typename EdgeDstValueInputWrapper::value_type>>;
   using edge_partition_e_input_device_view_t = std::conditional_t<
-    std::is_same_v<typename EdgeValueInputWrapper::value_type, thrust::nullopt_t>,
+    std::is_same_v<typename EdgeValueInputWrapper::value_type, cuda::std::nullopt_t>,
     detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
     detail::edge_partition_edge_property_device_view_t<
       edge_t,
@@ -556,10 +556,10 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     rmm::device_uvector<vertex_t> tmp_keys(0, handle.get_stream());
     std::optional<rmm::device_uvector<edge_t>> edge_offsets_with_mask{std::nullopt};
@@ -627,9 +627,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
               edge_partition_src_dst_key_input,
               edge_partition_e_mask,
               edge_offsets_with_mask
-                ? thrust::make_optional<raft::device_span<edge_t const>>(
+                ? cuda::std::make_optional<raft::device_span<edge_t const>>(
                     (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size())
-                : thrust::nullopt,
+                : cuda::std::nullopt,
               e_op,
               tmp_keys.data(),
               get_dataframe_buffer_begin(tmp_value_buffer));
@@ -650,9 +650,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
               edge_partition_src_dst_key_input,
               edge_partition_e_mask,
               edge_offsets_with_mask
-                ? thrust::make_optional<raft::device_span<edge_t const>>(
+                ? cuda::std::make_optional<raft::device_span<edge_t const>>(
                     (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size())
-                : thrust::nullopt,
+                : cuda::std::nullopt,
               e_op,
               tmp_keys.data(),
               get_dataframe_buffer_begin(tmp_value_buffer));
@@ -673,9 +673,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
               edge_partition_src_dst_key_input,
               edge_partition_e_mask,
               edge_offsets_with_mask
-                ? thrust::make_optional<raft::device_span<edge_t const>>(
+                ? cuda::std::make_optional<raft::device_span<edge_t const>>(
                     (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size())
-                : thrust::nullopt,
+                : cuda::std::nullopt,
               e_op,
               tmp_keys.data(),
               get_dataframe_buffer_begin(tmp_value_buffer));
@@ -695,9 +695,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
               edge_partition_src_dst_key_input,
               edge_partition_e_mask,
               edge_offsets_with_mask
-                ? thrust::make_optional<raft::device_span<edge_t const>>(
+                ? cuda::std::make_optional<raft::device_span<edge_t const>>(
                     (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size())
-                : thrust::nullopt,
+                : cuda::std::nullopt,
               e_op,
               tmp_keys.data(),
               get_dataframe_buffer_begin(tmp_value_buffer));
@@ -719,9 +719,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
             edge_partition_src_dst_key_input,
             edge_partition_e_mask,
             edge_offsets_with_mask
-              ? thrust::make_optional<raft::device_span<edge_t const>>(
+              ? cuda::std::make_optional<raft::device_span<edge_t const>>(
                   (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size())
-              : thrust::nullopt,
+              : cuda::std::nullopt,
             e_op,
             tmp_keys.data(),
             get_dataframe_buffer_begin(tmp_value_buffer));
diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh
index 87f590f571f..884079d103d 100644
--- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh
+++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,6 +42,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -52,7 +53,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
@@ -86,7 +86,7 @@ template <typename key_t,
 struct transform_reduce_v_frontier_call_e_op_t {
   EdgeOp e_op{};
 
-  __device__ thrust::optional<
+  __device__ cuda::std::optional<
     std::conditional_t<!std::is_same_v<key_t, void> && !std::is_same_v<payload_t, void>,
                        thrust::tuple<key_t, payload_t>,
                        std::conditional_t<!std::is_same_v<key_t, void>, key_t, payload_t>>>
@@ -106,7 +106,7 @@ struct transform_reduce_v_frontier_call_e_op_t {
                                   thrust::get<1>(*e_op_result));
       }
     } else {
-      return thrust::nullopt;
+      return cuda::std::nullopt;
     }
   }
 };
@@ -121,7 +121,7 @@ struct update_keep_flag_t {
   raft::device_span<uint32_t> keep_flags{};
   key_t v_range_first{};
   InputKeyIterator input_key_first{};
-  thrust::optional<input_key_t> invalid_input_key{};
+  cuda::std::optional<input_key_t> invalid_input_key{};
 
   __device__ void operator()(size_t i) const
   {
@@ -1058,10 +1058,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle,
         graph_view.local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
 
     if constexpr (GraphViewType::is_multi_gpu) {
       auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
@@ -1110,9 +1110,9 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle,
  * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor
  * outputs by (tagged-)destination ID.
  *
- * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are
- * assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag
- * type (KeyBucketType::key_type is identical to a vertex type otherwise).
+ * Edge functor outputs are cuda::std::optional objects and invalid if cuda::std::nullopt. Vertices
+ * are assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag type
+ * (KeyBucketType::key_type is identical to a vertex type otherwise).
  *
  * @tparam GraphViewType Type of the passed non-owning graph object.
  * @tparam KeyBucketType Type of the vertex frontier bucket class which abstracts the
@@ -1141,10 +1141,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle,
  * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not
  * access edge property values).
  * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for
- * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be
- * discarded); 2) dummy (but valid) thrust::optional object (e.g.
- * thrust::optional<std::byte>{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is
- * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be
+ * the source, destination, and edge and returns 1) cuda::std::nullopt (if invalid and to be
+ * discarded); 2) dummy (but valid) cuda::std::optional object (e.g.
+ * cuda::std::optional<std::byte>{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type
+ * is void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be
  * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void);
  * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type
  * is not void).
diff --git a/cpp/src/prims/update_v_frontier.cuh b/cpp/src/prims/update_v_frontier.cuh
index a9b0a6b823b..0516ebc7d31 100644
--- a/cpp/src/prims/update_v_frontier.cuh
+++ b/cpp/src/prims/update_v_frontier.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -157,10 +157,10 @@ struct check_invalid_bucket_idx_t {
  * graph_view.local_vertex_partition_range_size().
  * @param v_op Ternary operator that takes (tagged-)vertex ID, *(@p vertex_value_input_first + i)
  * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and the payload value for the
- * (tagged-)vertex ID and returns a tuple of 1) a thrust::optional object optionally storing a
- * bucket index and 2) a thrust::optional object optionally storing a new vertex property value. If
- * the first element of the returned tuple is thrust::nullopt, this (tagged-)vertex won't be
- * inserted to the vertex frontier. If the second element is thrust::nullopt, the vertex property
+ * (tagged-)vertex ID and returns a tuple of 1) a cuda::std::optional object optionally storing a
+ * bucket index and 2) a cuda::std::optional object optionally storing a new vertex property value.
+ * If the first element of the returned tuple is cuda::std::nullopt, this (tagged-)vertex won't be
+ * inserted to the vertex frontier. If the second element is cuda::std::nullopt, the vertex property
  * value for this vertex won't be updated. Note that it is currently undefined behavior if there are
  * multiple tagged-vertices with the same vertex ID (but with different tags) AND @p v_op results on
  * the tagged-vertices with the same vertex ID have more than one valid new vertex property values.
@@ -286,13 +286,13 @@ void update_v_frontier(raft::handle_t const& handle,
  * graph_view.local_vertex_partition_range_size().
  * @param v_op Binary operator that takes (tagged-)vertex ID, and *(@p vertex_value_input_first + i)
  * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and returns a tuple of 1) a
- * thrust::optional object optionally storing a bucket index and 2) a thrust::optional object
+ * cuda::std::optional object optionally storing a bucket index and 2) a cuda::std::optional object
  * optionally storing a new vertex property value. If the first element of the returned tuple is
- * thrust::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second
- * element is thrust::nullopt, the vertex property value for this vertex won't be updated. Note that
- * it is currently undefined behavior if there are multiple tagged-vertices with the same vertex ID
- * (but with different tags) AND @p v_op results on the tagged-vertices with the same vertex ID have
- * more than one valid new vertex property values.
+ * cuda::std::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second
+ * element is cuda::std::nullopt, the vertex property value for this vertex won't be updated. Note
+ * that it is currently undefined behavior if there are multiple tagged-vertices with the same
+ * vertex ID (but with different tags) AND @p v_op results on the tagged-vertices with the same
+ * vertex ID have more than one valid new vertex property values.
  */
 template <typename GraphViewType,
           typename KeyBuffer,
diff --git a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh
index d2047883349..19d6688f1ed 100644
--- a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh
+++ b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 #include <thrust/tuple.h>
 
 namespace cugraph {
@@ -40,8 +40,8 @@ struct return_edges_with_properties_e_op {
   template <typename key_t, typename vertex_t, typename EdgeProperties>
   auto __host__ __device__ operator()(key_t optionally_tagged_src,
                                       vertex_t dst,
-                                      thrust::nullopt_t,
-                                      thrust::nullopt_t,
+                                      cuda::std::nullopt_t,
+                                      cuda::std::nullopt_t,
                                       EdgeProperties edge_properties) const
   {
     static_assert(std::is_same_v<key_t, vertex_t> ||
@@ -51,43 +51,43 @@ struct return_edges_with_properties_e_op {
     if constexpr (std::is_same_v<key_t, vertex_t>) {
       vertex_t src{optionally_tagged_src};
 
-      if constexpr (std::is_same_v<EdgeProperties, thrust::nullopt_t>) {
-        return thrust::make_optional(thrust::make_tuple(src, dst));
+      if constexpr (std::is_same_v<EdgeProperties, cuda::std::nullopt_t>) {
+        return cuda::std::make_optional(thrust::make_tuple(src, dst));
       } else if constexpr (std::is_arithmetic<EdgeProperties>::value) {
-        return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties));
+        return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties));
       } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic<EdgeProperties>::value &&
                            (thrust::tuple_size<EdgeProperties>::value == 2)) {
-        return thrust::make_optional(thrust::make_tuple(
+        return cuda::std::make_optional(thrust::make_tuple(
           src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties)));
       } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic<EdgeProperties>::value &&
                            (thrust::tuple_size<EdgeProperties>::value == 3)) {
-        return thrust::make_optional(thrust::make_tuple(src,
-                                                        dst,
-                                                        thrust::get<0>(edge_properties),
-                                                        thrust::get<1>(edge_properties),
-                                                        thrust::get<2>(edge_properties)));
+        return cuda::std::make_optional(thrust::make_tuple(src,
+                                                           dst,
+                                                           thrust::get<0>(edge_properties),
+                                                           thrust::get<1>(edge_properties),
+                                                           thrust::get<2>(edge_properties)));
       }
     } else if constexpr (std::is_same_v<key_t, thrust::tuple<vertex_t, int32_t>>) {
       vertex_t src{thrust::get<0>(optionally_tagged_src)};
       int32_t label{thrust::get<1>(optionally_tagged_src)};
 
       src = thrust::get<0>(optionally_tagged_src);
-      if constexpr (std::is_same_v<EdgeProperties, thrust::nullopt_t>) {
-        return thrust::make_optional(thrust::make_tuple(src, dst, label));
+      if constexpr (std::is_same_v<EdgeProperties, cuda::std::nullopt_t>) {
+        return cuda::std::make_optional(thrust::make_tuple(src, dst, label));
       } else if constexpr (std::is_arithmetic<EdgeProperties>::value) {
-        return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties, label));
+        return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties, label));
       } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic<EdgeProperties>::value &&
                            (thrust::tuple_size<EdgeProperties>::value == 2)) {
-        return thrust::make_optional(thrust::make_tuple(
+        return cuda::std::make_optional(thrust::make_tuple(
           src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties), label));
       } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic<EdgeProperties>::value &&
                            (thrust::tuple_size<EdgeProperties>::value == 3)) {
-        return thrust::make_optional(thrust::make_tuple(src,
-                                                        dst,
-                                                        thrust::get<0>(edge_properties),
-                                                        thrust::get<1>(edge_properties),
-                                                        thrust::get<2>(edge_properties),
-                                                        label));
+        return cuda::std::make_optional(thrust::make_tuple(src,
+                                                           dst,
+                                                           thrust::get<0>(edge_properties),
+                                                           thrust::get<1>(edge_properties),
+                                                           thrust::get<2>(edge_properties),
+                                                           label));
       }
     }
   }
diff --git a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh
index 5c04d628f09..2a6136fb96a 100644
--- a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh
+++ b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,11 +29,12 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 #include <thrust/unique.h>
 
+#include <optional>
+
 namespace cugraph {
 namespace detail {
 
diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh
index f6793c4a157..9ce8edbb9c1 100644
--- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh
+++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,10 +20,11 @@
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/binary_search.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/tuple.h>
 
+#include <optional>
+
 namespace cugraph {
 namespace detail {
 
diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu
index 89634253ee7..35c2fc5abb1 100644
--- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu
+++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,10 +21,11 @@
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/binary_search.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/tuple.h>
 
+#include <optional>
+
 namespace cugraph {
 namespace detail {
 
diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu
index 41cb7413bc4..b627431d53f 100644
--- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu
+++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,10 +21,11 @@
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/binary_search.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/tuple.h>
 
+#include <optional>
+
 namespace cugraph {
 namespace detail {
 
diff --git a/cpp/src/sampling/detail/sample_edges.cuh b/cpp/src/sampling/detail/sample_edges.cuh
index 0c670c6507e..a4e228522aa 100644
--- a/cpp/src/sampling/detail/sample_edges.cuh
+++ b/cpp/src/sampling/detail/sample_edges.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 
@@ -41,12 +41,12 @@ struct sample_edges_op_t {
   template <typename EdgeProperties>
   auto __host__ __device__ operator()(vertex_t src,
                                       vertex_t dst,
-                                      thrust::nullopt_t,
-                                      thrust::nullopt_t,
+                                      cuda::std::nullopt_t,
+                                      cuda::std::nullopt_t,
                                       EdgeProperties edge_properties) const
   {
     // FIXME: A solution using thrust_tuple_cat would be more flexible here
-    if constexpr (std::is_same_v<EdgeProperties, thrust::nullopt_t>) {
+    if constexpr (std::is_same_v<EdgeProperties, cuda::std::nullopt_t>) {
       return thrust::make_tuple(src, dst);
     } else if constexpr (std::is_arithmetic<EdgeProperties>::value) {
       return thrust::make_tuple(src, dst, edge_properties);
@@ -68,7 +68,7 @@ struct sample_edges_op_t {
 template <typename vertex_t, typename bias_t>
 struct sample_edge_biases_op_t {
   auto __host__ __device__
-  operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const
+  operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const
   {
     return bias;
   }
diff --git a/cpp/src/sampling/detail/sampling_utils.hpp b/cpp/src/sampling/detail/sampling_utils.hpp
index 17eb8dd0873..71387eb0e63 100644
--- a/cpp/src/sampling/detail/sampling_utils.hpp
+++ b/cpp/src/sampling/detail/sampling_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
+#include <optional>
 
 namespace cugraph {
 namespace detail {
diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
index 391dd99b1df..ce4888e3359 100644
--- a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
+++ b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,10 +32,11 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 
+#include <optional>
+
 namespace cugraph {
 namespace detail {
 
diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp
index bbc0fbc17af..b759e479bc6 100644
--- a/cpp/src/sampling/neighbor_sampling_impl.hpp
+++ b/cpp/src/sampling/neighbor_sampling_impl.hpp
@@ -31,6 +31,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/unique.h>
 
 namespace cugraph {
@@ -123,9 +124,9 @@ neighbor_sample_impl(raft::handle_t const& handle,
         *edge_type_view,
         [valid_edge_type = i] __device__(auto src,
                                          auto dst,
-                                         thrust::nullopt_t,
-                                         thrust::nullopt_t,
-                                         /*thrust::nullopt_t*/ auto edge_type) {
+                                         cuda::std::nullopt_t,
+                                         cuda::std::nullopt_t,
+                                         /*cuda::std::nullopt_t*/ auto edge_type) {
           return edge_type == valid_edge_type;
         },
         edge_mask.mutable_view(),
diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh
index 0b1d9dcdb56..440c0c7a6ec 100644
--- a/cpp/src/sampling/random_walks.cuh
+++ b/cpp/src/sampling/random_walks.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -43,7 +44,6 @@
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/logical.h>
-#include <thrust/optional.h>
 #include <thrust/remove.h>
 #include <thrust/scan.h>
 #include <thrust/scatter.h>
@@ -216,8 +216,8 @@ struct col_indx_extract_t {
        ptr_d_coalesced_v = original::raw_const_ptr(d_coalesced_src_v),
        row_offsets       = row_offsets_,
        col_indices       = col_indices_,
-       values            = values_ ? thrust::optional<weight_t const*>{*values_}
-                                   : thrust::nullopt] __device__(auto indx, auto col_indx) {
+       values            = values_ ? cuda::std::optional<weight_t const*>{*values_}
+                                   : cuda::std::nullopt] __device__(auto indx, auto col_indx) {
         auto delta     = ptr_d_sizes[indx] - 1;
         auto v_indx    = ptr_d_coalesced_v[indx * max_depth + delta];
         auto start_row = row_offsets[v_indx];
diff --git a/cpp/src/sampling/random_walks_impl.cuh b/cpp/src/sampling/random_walks_impl.cuh
index 6c10fc473f3..fbf0836dac5 100644
--- a/cpp/src/sampling/random_walks_impl.cuh
+++ b/cpp/src/sampling/random_walks_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 
 #include <algorithm>
 #include <limits>
@@ -54,14 +54,14 @@ template <typename vertex_t, typename weight_t>
 struct sample_edges_op_t {
   template <typename W = weight_t>
   __device__ std::enable_if_t<std::is_same_v<W, void>, vertex_t> operator()(
-    vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+    vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const
   {
     return dst;
   }
 
   template <typename W = weight_t>
   __device__ std::enable_if_t<!std::is_same_v<W, void>, thrust::tuple<vertex_t, W>> operator()(
-    vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, W w) const
+    vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, W w) const
   {
     return thrust::make_tuple(dst, w);
   }
@@ -69,8 +69,8 @@ struct sample_edges_op_t {
 
 template <typename vertex_t, typename bias_t>
 struct biased_random_walk_e_bias_op_t {
-  __device__ bias_t
-  operator()(vertex_t, vertex_t, bias_t src_out_weight_sum, thrust::nullopt_t, bias_t weight) const
+  __device__ bias_t operator()(
+    vertex_t, vertex_t, bias_t src_out_weight_sum, cuda::std::nullopt_t, bias_t weight) const
   {
     return weight / src_out_weight_sum;
   }
@@ -79,7 +79,7 @@ struct biased_random_walk_e_bias_op_t {
 template <typename vertex_t, typename weight_t>
 struct biased_sample_edges_op_t {
   __device__ thrust::tuple<vertex_t, weight_t> operator()(
-    vertex_t, vertex_t dst, weight_t, thrust::nullopt_t, weight_t weight) const
+    vertex_t, vertex_t dst, weight_t, cuda::std::nullopt_t, weight_t weight) const
   {
     return thrust::make_tuple(dst, weight);
   }
@@ -99,9 +99,9 @@ struct node2vec_random_walk_e_bias_op_t {
   __device__ std::enable_if_t<std::is_same_v<W, void>, bias_t> operator()(
     thrust::tuple<vertex_t, vertex_t> tagged_src,
     vertex_t dst,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
-    thrust::nullopt_t) const
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t) const
   {
     //  Check tag (prev vert) for destination
     if (dst == thrust::get<1>(tagged_src)) { return 1.0 / p_; }
@@ -126,8 +126,8 @@ struct node2vec_random_walk_e_bias_op_t {
   __device__ std::enable_if_t<!std::is_same_v<W, void>, bias_t> operator()(
     thrust::tuple<vertex_t, vertex_t> tagged_src,
     vertex_t dst,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
     W) const
   {
     //  Check tag (prev vert) for destination
@@ -155,9 +155,9 @@ struct node2vec_sample_edges_op_t {
   __device__ std::enable_if_t<std::is_same_v<W, void>, vertex_t> operator()(
     thrust::tuple<vertex_t, vertex_t> tagged_src,
     vertex_t dst,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
-    thrust::nullopt_t) const
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t) const
   {
     return dst;
   }
@@ -166,8 +166,8 @@ struct node2vec_sample_edges_op_t {
   __device__ std::enable_if_t<!std::is_same_v<W, void>, thrust::tuple<vertex_t, W>> operator()(
     thrust::tuple<vertex_t, vertex_t> tagged_src,
     vertex_t dst,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
     W w) const
   {
     return thrust::make_tuple(dst, w);
diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp
index 2c5658b32a5..d371b4141e0 100644
--- a/cpp/src/sampling/rw_traversals.hpp
+++ b/cpp/src/sampling/rw_traversals.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,12 +27,12 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/for_each.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/reduce.h>
 #include <thrust/tuple.h>
 
@@ -138,7 +138,7 @@ struct uniform_selector_t {
     {
     }
 
-    __device__ thrust::optional<thrust::tuple<vertex_t, weight_t>> operator()(
+    __device__ cuda::std::optional<thrust::tuple<vertex_t, weight_t>> operator()(
       vertex_t src_v,
       real_t rnd_val,
       vertex_t = 0 /* not used*/,
@@ -146,7 +146,7 @@ struct uniform_selector_t {
       bool     = false /* not used*/) const
     {
       auto crt_out_deg = ptr_d_cache_out_degs_[src_v];
-      if (crt_out_deg == 0) return thrust::nullopt;  // src_v is a sink
+      if (crt_out_deg == 0) return cuda::std::nullopt;  // src_v is a sink
 
       vertex_t v_indx =
         static_cast<vertex_t>(rnd_val >= 1.0 ? crt_out_deg - 1 : rnd_val * crt_out_deg);
@@ -156,7 +156,8 @@ struct uniform_selector_t {
       auto weight_value =
         (values_ == nullptr ? weight_t{1}
                             : values_[start_row + col_indx]);  // account for un-weighted graphs
-      return thrust::optional{thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)};
+      return cuda::std::optional{
+        thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)};
     }
 
    private:
@@ -211,7 +212,7 @@ struct biased_selector_t {
     // Sum(weights(neighborhood(src_v))) are pre-computed and
     // stored in ptr_d_sum_weights_ (too expensive to check, here);
     //
-    __device__ thrust::optional<thrust::tuple<vertex_t, weight_t>> operator()(
+    __device__ cuda::std::optional<thrust::tuple<vertex_t, weight_t>> operator()(
       vertex_t src_v,
       real_t rnd_val,
       vertex_t = 0 /* not used*/,
@@ -223,7 +224,7 @@ struct biased_selector_t {
 
       auto col_indx_begin = row_offsets_[src_v];
       auto col_indx_end   = row_offsets_[src_v + 1];
-      if (col_indx_begin == col_indx_end) return thrust::nullopt;  // src_v is a sink
+      if (col_indx_begin == col_indx_end) return cuda::std::nullopt;  // src_v is a sink
 
       auto col_indx      = col_indx_begin;
       auto prev_col_indx = col_indx;
@@ -234,7 +235,7 @@ struct biased_selector_t {
         run_sum_w += values_[col_indx];
         prev_col_indx = col_indx;
       }
-      return thrust::optional{
+      return cuda::std::optional{
         thrust::make_tuple(col_indices_[prev_col_indx], values_[prev_col_indx])};
     }
 
@@ -293,9 +294,9 @@ struct node2vec_selector_t {
         q_(q),
         coalesced_alpha_{
           (max_degree > 0) && (num_paths > 0) && (ptr_alpha != nullptr)
-            ? thrust::optional<thrust::tuple<vertex_t, edge_t, weight_t*>>{thrust::make_tuple(
+            ? cuda::std::optional<thrust::tuple<vertex_t, edge_t, weight_t*>>{thrust::make_tuple(
                 max_degree, num_paths, ptr_alpha)}
-            : thrust::nullopt}
+            : cuda::std::nullopt}
     {
     }
 
@@ -324,7 +325,7 @@ struct node2vec_selector_t {
       }
     }
 
-    __device__ thrust::optional<thrust::tuple<vertex_t, weight_t>> operator()(
+    __device__ cuda::std::optional<thrust::tuple<vertex_t, weight_t>> operator()(
       vertex_t src_v, real_t rnd_val, vertex_t prev_v, edge_t path_index, bool start_path) const
     {
       auto const offset_indx_begin = row_offsets_[src_v];
@@ -333,7 +334,7 @@ struct node2vec_selector_t {
       weight_t sum_scaled_weights{0};
       auto offset_indx = offset_indx_begin;
 
-      if (offset_indx_begin == offset_indx_end) return thrust::nullopt;  // src_v is a sink
+      if (offset_indx_begin == offset_indx_end) return cuda::std::nullopt;  // src_v is a sink
 
       // for 1st vertex in path just use biased random selection:
       //
@@ -359,7 +360,7 @@ struct node2vec_selector_t {
           run_sum_w += crt_weight;
           prev_offset_indx = offset_indx;
         }
-        return thrust::optional{
+        return cuda::std::optional{
           thrust::make_tuple(col_indices_[prev_offset_indx],
                              values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])};
       }
@@ -402,7 +403,7 @@ struct node2vec_selector_t {
           run_sum_w += ptr_d_scaled_weights[start_alpha_offset + nghbr_indx];
           prev_offset_indx = offset_indx;
         }
-        return thrust::optional{
+        return cuda::std::optional{
           thrust::make_tuple(col_indices_[prev_offset_indx],
                              values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])};
 
@@ -435,7 +436,7 @@ struct node2vec_selector_t {
           run_sum_w += scaled_weight;
           prev_offset_indx = offset_indx;
         }
-        return thrust::optional{
+        return cuda::std::optional{
           thrust::make_tuple(col_indices_[prev_offset_indx],
                              values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])};
       }
@@ -459,7 +460,7 @@ struct node2vec_selector_t {
     // this is information related to a scratchpad buffer, used as cache, hence mutable;
     // (necessary, because get_strategy() is const)
     //
-    mutable thrust::optional<thrust::tuple<vertex_t, edge_t, weight_t*>>
+    mutable cuda::std::optional<thrust::tuple<vertex_t, edge_t, weight_t*>>
       coalesced_alpha_;  // tuple<max_vertex_degree,
                          // num_paths, alpha_buffer[max_vertex_degree*num_paths]>
   };
diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh
index 151350dad6d..ef1a31400f7 100644
--- a/cpp/src/sampling/sampling_post_processing_impl.cuh
+++ b/cpp/src/sampling/sampling_post_processing_impl.cuh
@@ -29,6 +29,7 @@
 
 #include <cub/cub.cuh>
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -51,9 +52,9 @@ namespace {
 
 template <typename vertex_t, typename edge_type_t>
 struct edge_order_t {
-  thrust::optional<raft::device_span<size_t const>> edgelist_label_offsets{thrust::nullopt};
-  thrust::optional<raft::device_span<edge_type_t const>> edgelist_edge_types{thrust::nullopt};
-  thrust::optional<raft::device_span<int32_t const>> edgelist_hops{thrust::nullopt};
+  cuda::std::optional<raft::device_span<size_t const>> edgelist_label_offsets{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<edge_type_t const>> edgelist_edge_types{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<int32_t const>> edgelist_hops{cuda::std::nullopt};
   raft::device_span<vertex_t const> edgelist_majors{};
   raft::device_span<vertex_t const> edgelist_minors{};
 
@@ -99,8 +100,8 @@ struct edge_order_t {
 
 template <typename vertex_t>
 struct is_first_triplet_in_run_t {
-  thrust::optional<raft::device_span<size_t const>> edgelist_label_offsets{thrust::nullopt};
-  thrust::optional<raft::device_span<int32_t const>> edgelist_hops{thrust::nullopt};
+  cuda::std::optional<raft::device_span<size_t const>> edgelist_label_offsets{cuda::std::nullopt};
+  cuda::std::optional<raft::device_span<int32_t const>> edgelist_hops{cuda::std::nullopt};
   raft::device_span<vertex_t const> edgelist_majors{};
 
   __device__ bool operator()(size_t i) const
@@ -142,7 +143,7 @@ struct compute_label_index_t {
 
 template <typename label_index_t>
 struct optionally_compute_label_index_t {
-  thrust::optional<raft::device_span<size_t const>> edgelist_label_offsets{thrust::nullopt};
+  cuda::std::optional<raft::device_span<size_t const>> edgelist_label_offsets{cuda::std::nullopt};
 
   __device__ label_index_t operator()(size_t i) const
   {
@@ -2370,9 +2371,9 @@ heterogeneous_renumber_sampled_edgelist(
         cuda::proclaim_return_type<edge_id_t>(
           [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets),
            edge_types             = edgelist_edge_types
-                                      ? thrust::make_optional<raft::device_span<edge_type_t const>>(
+                                      ? cuda::std::make_optional<raft::device_span<edge_type_t const>>(
                               (*edgelist_edge_types).data(), (*edgelist_edge_types).size())
-                                      : thrust::nullopt,
+                                      : cuda::std::nullopt,
            renumber_map =
              raft::device_span<edge_id_t const>(segment_sorted_edge_id_renumber_map.data(),
                                                 segment_sorted_edge_id_renumber_map.size()),
@@ -2499,17 +2500,17 @@ sort_sampled_edge_tuples(raft::handle_t const& handle,
                                         handle.get_stream());
     thrust::sequence(handle.get_thrust_policy(), indices.begin(), indices.end(), size_t{0});
     edge_order_t<vertex_t, edge_type_t> edge_order_comp{
-      edgelist_label_offsets ? thrust::make_optional<raft::device_span<size_t const>>(
+      edgelist_label_offsets ? cuda::std::make_optional<raft::device_span<size_t const>>(
                                  (*edgelist_label_offsets).data() + h_label_offsets[i],
                                  (h_label_offsets[i + 1] - h_label_offsets[i]) + 1)
-                             : thrust::nullopt,
+                             : cuda::std::nullopt,
       edgelist_edge_types && use_edge_type_as_sort_key
-        ? thrust::make_optional<raft::device_span<edge_type_t const>>(
+        ? cuda::std::make_optional<raft::device_span<edge_type_t const>>(
             (*edgelist_edge_types).data() + h_edge_offsets[i], indices.size())
-        : thrust::nullopt,
-      edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+        : cuda::std::nullopt,
+      edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                         (*edgelist_hops).data() + h_edge_offsets[i], indices.size())
-                    : thrust::nullopt,
+                    : cuda::std::nullopt,
       raft::device_span<vertex_t const>(edgelist_majors.data() + h_edge_offsets[i], indices.size()),
       raft::device_span<vertex_t const>(edgelist_minors.data() + h_edge_offsets[i],
                                         indices.size())};
@@ -2686,8 +2687,8 @@ renumber_and_compress_sampled_edgelist(
       auto label_index_first = thrust::make_transform_iterator(
         thrust::make_counting_iterator(size_t{0}),
         optionally_compute_label_index_t<label_index_t>{
-          edgelist_label_offsets ? thrust::make_optional(*edgelist_label_offsets)
-                                 : thrust::nullopt});
+          edgelist_label_offsets ? cuda::std::make_optional(*edgelist_label_offsets)
+                                 : cuda::std::nullopt});
       auto input_key_first = thrust::make_zip_iterator(label_index_first, (*edgelist_hops).begin());
       rmm::device_uvector<label_index_t> unique_key_label_indices(min_vertices.size(),
                                                                   handle.get_stream());
@@ -2781,9 +2782,9 @@ renumber_and_compress_sampled_edgelist(
     thrust::make_counting_iterator(edgelist_majors.size()),
     is_first_triplet_in_run_t<vertex_t>{
       detail::to_thrust_optional(edgelist_label_offsets),
-      edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+      edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                         (*edgelist_hops).data(), (*edgelist_hops).size())
-                    : thrust::nullopt,
+                    : cuda::std::nullopt,
       raft::device_span<vertex_t const>(
         edgelist_majors.data(),
         edgelist_majors.size())});  // number of unique ((label), (hop), major) triplets
@@ -2910,15 +2911,15 @@ renumber_and_compress_sampled_edgelist(
       major_vertex_counts.begin(),
       major_vertex_counts.end(),
       [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets),
-       edgelist_hops          = edgelist_hops ? thrust::make_optional<raft::device_span<int32_t>>(
+       edgelist_hops = edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t>>(
                                          (*edgelist_hops).data(), (*edgelist_hops).size())
-                                              : thrust::nullopt,
+                                     : cuda::std::nullopt,
        edgelist_majors =
          raft::device_span<vertex_t const>(edgelist_majors.data(), edgelist_majors.size()),
        seed_vertices             = renumbered_seed_vertices
-                                     ? thrust::make_optional<raft::device_span<vertex_t const>>(
+                                     ? cuda::std::make_optional<raft::device_span<vertex_t const>>(
                              (*renumbered_seed_vertices).data(), (*renumbered_seed_vertices).size())
-                                     : thrust::nullopt,
+                                     : cuda::std::nullopt,
        seed_vertex_label_offsets = detail::to_thrust_optional(seed_vertex_label_offsets),
        num_hops,
        compress_per_hop] __device__(size_t i) {
@@ -3045,9 +3046,9 @@ renumber_and_compress_sampled_edgelist(
       [major_vertex_counts =
          raft::device_span<vertex_t const>(major_vertex_counts.data(), major_vertex_counts.size()),
        minor_vertex_counts = minor_vertex_counts
-                               ? thrust::make_optional<raft::device_span<vertex_t const>>(
+                               ? cuda::std::make_optional<raft::device_span<vertex_t const>>(
                                    (*minor_vertex_counts).data(), (*minor_vertex_counts).size())
-                               : thrust::nullopt,
+                               : cuda::std::nullopt,
        num_hops,
        compress_per_hop] __device__(size_t i) {
         auto vertex_count = major_vertex_counts[i];
@@ -3310,9 +3311,9 @@ renumber_and_sort_sampled_edgelist(
       (*edgelist_label_hop_offsets).begin(),
       cuda::proclaim_return_type<size_t>(
         [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets),
-         edgelist_hops = edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+         edgelist_hops = edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                                            (*edgelist_hops).data(), (*edgelist_hops).size())
-                                       : thrust::nullopt,
+                                       : cuda::std::nullopt,
          num_hops,
          num_edges = edgelist_majors.size()] __device__(size_t i) {
           size_t start_offset{0};
@@ -3500,12 +3501,12 @@ heterogeneous_renumber_and_sort_sampled_edgelist(
       cuda::proclaim_return_type<size_t>(
         [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets),
          edgelist_edge_types    = edgelist_edge_types
-                                    ? thrust::make_optional<raft::device_span<edge_type_t const>>(
+                                    ? cuda::std::make_optional<raft::device_span<edge_type_t const>>(
                                      (*edgelist_edge_types).data(), (*edgelist_edge_types).size())
-                                    : thrust::nullopt,
-         edgelist_hops = edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+                                    : cuda::std::nullopt,
+         edgelist_hops = edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                                            (*edgelist_hops).data(), (*edgelist_hops).size())
-                                       : thrust::nullopt,
+                                       : cuda::std::nullopt,
          num_edge_types,
          num_hops,
          num_edges = edgelist_majors.size()] __device__(size_t i) {
@@ -3653,9 +3654,9 @@ sort_sampled_edgelist(raft::handle_t const& handle,
       (*edgelist_label_hop_offsets).begin(),
       cuda::proclaim_return_type<size_t>(
         [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets),
-         edgelist_hops = edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+         edgelist_hops = edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                                            (*edgelist_hops).data(), (*edgelist_hops).size())
-                                       : thrust::nullopt,
+                                       : cuda::std::nullopt,
          num_hops,
          num_edges = edgelist_majors.size()] __device__(size_t i) {
           size_t start_offset{0};
diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh
index 31de9b1e5d3..f526a6788e1 100644
--- a/cpp/src/structure/graph_view_impl.cuh
+++ b/cpp/src/structure/graph_view_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/count.h>
 #include <thrust/extrema.h>
@@ -47,7 +48,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
 #include <thrust/transform.h>
@@ -126,8 +126,8 @@ rmm::device_uvector<edge_t> compute_major_degrees(
       partition.vertex_partition_range_first(major_range_vertex_partition_id);
 
     auto offsets = edge_partition_offsets[i];
-    auto masks =
-      edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt;
+    auto masks   = edge_partition_masks ? cuda::std::make_optional((*edge_partition_masks)[i])
+                                        : cuda::std::nullopt;
     auto segment_offset_size_per_partition =
       edge_partition_segment_offsets.size() / static_cast<size_t>(minor_comm_size);
     auto num_local_degrees =
@@ -202,7 +202,8 @@ rmm::device_uvector<edge_t> compute_major_degrees(
     handle.get_thrust_policy(),
     degrees.begin(),
     degrees.end(),
-    [offsets, masks = masks ? thrust::make_optional(*masks) : thrust::nullopt] __device__(auto i) {
+    [offsets,
+     masks = masks ? cuda::std::make_optional(*masks) : cuda::std::nullopt] __device__(auto i) {
       auto local_degree = offsets[i + 1] - offsets[i];
       if (masks) {
         local_degree =
@@ -842,10 +843,10 @@ graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<mul
       edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
     thrust::transform(handle.get_thrust_policy(),
                       sorted_edge_first + edge_partition_offsets[i],
                       sorted_edge_first + edge_partition_offsets[i + 1],
@@ -913,10 +914,10 @@ graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<!mu
     edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view());
   auto edge_partition_e_mask =
     edge_mask_view
-      ? thrust::make_optional<
+      ? cuda::std::make_optional<
           detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
           *edge_mask_view, 0)
-      : thrust::nullopt;
+      : cuda::std::nullopt;
   thrust::transform(
     handle.get_thrust_policy(),
     edge_first,
@@ -987,10 +988,10 @@ graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<mul
       edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view(i));
     auto edge_partition_e_mask =
       edge_mask_view
-        ? thrust::make_optional<
+        ? cuda::std::make_optional<
             detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
             *edge_mask_view, i)
-        : thrust::nullopt;
+        : cuda::std::nullopt;
     thrust::transform(
       handle.get_thrust_policy(),
       sorted_edge_first + edge_partition_offsets[i],
@@ -1058,10 +1059,10 @@ graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<!mu
     edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view());
   auto edge_partition_e_mask =
     edge_mask_view
-      ? thrust::make_optional<
+      ? cuda::std::make_optional<
           detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
           *edge_mask_view, 0)
-      : thrust::nullopt;
+      : cuda::std::nullopt;
   thrust::transform(
     handle.get_thrust_policy(),
     edge_first,
diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh
index 3822055b037..e319dc03bbb 100644
--- a/cpp/src/structure/induced_subgraph_impl.cuh
+++ b/cpp/src/structure/induced_subgraph_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,6 +34,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
@@ -44,7 +45,6 @@
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/scan.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
@@ -58,7 +58,7 @@ namespace detail {
 
 template <typename vertex_t, typename weight_t, typename property_t>
 struct induced_subgraph_weighted_edge_op {
-  using return_type = thrust::optional<thrust::tuple<vertex_t, vertex_t, weight_t, size_t>>;
+  using return_type = cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t, size_t>>;
 
   raft::device_span<size_t const> dst_subgraph_offsets;
   raft::device_span<vertex_t const> dst_subgraph_vertices;
@@ -74,15 +74,15 @@ struct induced_subgraph_weighted_edge_op {
                                  dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph],
                                  dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1],
                                  dst)
-             ? thrust::make_optional(
+             ? cuda::std::make_optional(
                  thrust::make_tuple(thrust::get<0>(tagged_src), dst, wgt, subgraph))
-             : thrust::nullopt;
+             : cuda::std::nullopt;
   }
 };
 
 template <typename vertex_t, typename property_t>
 struct induced_subgraph_unweighted_edge_op {
-  using return_type = thrust::optional<thrust::tuple<vertex_t, vertex_t, size_t>>;
+  using return_type = cuda::std::optional<thrust::tuple<vertex_t, vertex_t, size_t>>;
 
   raft::device_span<size_t const> dst_subgraph_offsets;
   raft::device_span<vertex_t const> dst_subgraph_vertices;
@@ -91,15 +91,16 @@ struct induced_subgraph_unweighted_edge_op {
                                     vertex_t dst,
                                     property_t sv,
                                     property_t dv,
-                                    thrust::nullopt_t) const
+                                    cuda::std::nullopt_t) const
   {
     size_t subgraph = thrust::get<1>(tagged_src);
     return thrust::binary_search(thrust::seq,
                                  dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph],
                                  dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1],
                                  dst)
-             ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph))
-             : thrust::nullopt;
+             ? cuda::std::make_optional(
+                 thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph))
+             : cuda::std::nullopt;
   }
 };
 
@@ -203,8 +204,8 @@ extract_induced_subgraphs(
   dst_subgraph_vertices = raft::device_span<vertex_t const>(dst_subgraph_vertices_v.data(),
                                                             dst_subgraph_vertices_v.size());
 
-  // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns thrust::nullopt if
-  // the destination vertex has a property of 0, return the edge if the destination vertex has a
+  // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns cuda::std::nullopt
+  // if the destination vertex has a property of 0, return the edge if the destination vertex has a
   // property of 1
   vertex_frontier_t<vertex_t, size_t, multi_gpu, false> vertex_frontier(handle, 1);
 
@@ -233,7 +234,7 @@ extract_induced_subgraphs(
         edge_src_dummy_property_t{}.view(),
         edge_dst_dummy_property_t{}.view(),
         *edge_weight_view,
-        detail::induced_subgraph_weighted_edge_op<vertex_t, weight_t, thrust::nullopt_t>{
+        detail::induced_subgraph_weighted_edge_op<vertex_t, weight_t, cuda::std::nullopt_t>{
           dst_subgraph_offsets, dst_subgraph_vertices},
         do_expensive_check);
 
@@ -253,7 +254,7 @@ extract_induced_subgraphs(
         edge_src_dummy_property_t{}.view(),
         edge_dst_dummy_property_t{}.view(),
         edge_dummy_property_t{}.view(),
-        detail::induced_subgraph_unweighted_edge_op<vertex_t, thrust::nullopt_t>{
+        detail::induced_subgraph_unweighted_edge_op<vertex_t, cuda::std::nullopt_t>{
           dst_subgraph_offsets, dst_subgraph_vertices},
         do_expensive_check);
 
diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh
index ba40db1f085..2c2674f5bbe 100644
--- a/cpp/src/traversal/bfs_impl.cuh
+++ b/cpp/src/traversal/bfs_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/fill.h>
@@ -40,7 +41,6 @@
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/set_operations.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
@@ -78,20 +78,26 @@ struct topdown_e_op_t {
   detail::edge_partition_endpoint_property_device_view_t<vertex_t, uint32_t*, bool> visited_flags{};
   vertex_t dst_first{};
 
-  __device__ thrust::optional<vertex_t> operator()(
-    vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+  __device__ cuda::std::optional<vertex_t> operator()(vertex_t src,
+                                                      vertex_t dst,
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t,
+                                                      cuda::std::nullopt_t) const
   {
     auto dst_offset = dst - dst_first;
     auto old        = prev_visited_flags.get(dst_offset);
     if (!old) { old = visited_flags.atomic_or(dst_offset, true); }
-    return old ? thrust::nullopt : thrust::optional<vertex_t>{src};
+    return old ? cuda::std::nullopt : cuda::std::optional<vertex_t>{src};
   }
 };
 
 template <typename vertex_t>
 struct bottomup_e_op_t {
-  __device__ vertex_t operator()(
-    vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+  __device__ vertex_t operator()(vertex_t src,
+                                 vertex_t dst,
+                                 cuda::std::nullopt_t,
+                                 cuda::std::nullopt_t,
+                                 cuda::std::nullopt_t) const
   {
     return dst;
   }
@@ -103,8 +109,11 @@ struct bottomup_pred_op_t {
     prev_visited_flags{};  // visited in the previous iterations
   vertex_t dst_first{};
 
-  __device__ bool operator()(
-    vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+  __device__ bool operator()(vertex_t src,
+                             vertex_t dst,
+                             cuda::std::nullopt_t,
+                             cuda::std::nullopt_t,
+                             cuda::std::nullopt_t) const
   {
     return prev_visited_flags.get(dst - dst_first);
   }
@@ -260,10 +269,10 @@ void bfs(raft::handle_t const& handle,
       auto edge_mask_view = graph_view.edge_mask_view();
       auto edge_partition_e_mask =
         edge_mask_view
-          ? thrust::make_optional<
+          ? cuda::std::make_optional<
               detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
               *edge_mask_view, partition_idx)
-          : thrust::nullopt;
+          : cuda::std::nullopt;
       auto high_and_mid_degree_segment_size =
         (*segment_offsets)[2];  // compute local degrees for high & mid degree segments only, for
                                 // low & hypersparse segments, use low_degree_threshold *
diff --git a/cpp/src/traversal/k_hop_nbrs_impl.cuh b/cpp/src/traversal/k_hop_nbrs_impl.cuh
index 44fa21a5252..be462720e51 100644
--- a/cpp/src/traversal/k_hop_nbrs_impl.cuh
+++ b/cpp/src/traversal/k_hop_nbrs_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,13 +30,13 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/fill.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <limits>
@@ -48,11 +48,11 @@ namespace {
 
 template <typename vertex_t>
 struct e_op_t {
-  __device__ thrust::optional<size_t> operator()(thrust::tuple<vertex_t, size_t> tagged_src,
-                                                 vertex_t,
-                                                 thrust::nullopt_t,
-                                                 thrust::nullopt_t,
-                                                 thrust::nullopt_t) const
+  __device__ cuda::std::optional<size_t> operator()(thrust::tuple<vertex_t, size_t> tagged_src,
+                                                    vertex_t,
+                                                    cuda::std::nullopt_t,
+                                                    cuda::std::nullopt_t,
+                                                    cuda::std::nullopt_t) const
   {
     return thrust::get<1>(tagged_src);
   }
diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh
index b3cd0d57c67..ffe706ca45d 100644
--- a/cpp/src/traversal/od_shortest_distances_impl.cuh
+++ b/cpp/src/traversal/od_shortest_distances_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,12 +37,12 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/integer_utils.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/fill.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/set_operations.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
@@ -133,11 +133,11 @@ struct e_op_t {
   weight_t cutoff{};
   weight_t invalid_distance{};
 
-  __device__ thrust::optional<thrust::tuple<tag_t, weight_t>> operator()(
+  __device__ cuda::std::optional<thrust::tuple<tag_t, weight_t>> operator()(
     thrust::tuple<vertex_t, tag_t> tagged_src,
     vertex_t dst,
-    thrust::nullopt_t,
-    thrust::nullopt_t,
+    cuda::std::nullopt_t,
+    cuda::std::nullopt_t,
     weight_t w) const
   {
     aggregate_vi_t<vertex_t, tag_t, key_t> aggregator{num_origins};
@@ -150,9 +150,9 @@ struct e_op_t {
     auto dst_val      = key_to_dist_map.find(aggregator(thrust::make_tuple(dst, origin_idx)));
     if (dst_val != invalid_distance) { threshold = dst_val < threshold ? dst_val : threshold; }
     return (new_distance < threshold)
-             ? thrust::optional<thrust::tuple<tag_t, weight_t>>{thrust::make_tuple(origin_idx,
-                                                                                   new_distance)}
-             : thrust::nullopt;
+             ? cuda::std::optional<thrust::tuple<tag_t, weight_t>>{thrust::make_tuple(origin_idx,
+                                                                                      new_distance)}
+             : cuda::std::nullopt;
   }
 };
 
@@ -644,8 +644,8 @@ rmm::device_uvector<weight_t> od_shortest_distances(
         thrust::tuple<vertex_t, od_idx_t>,
         weight_t,
         vertex_t,
-        thrust::nullopt_t,
-        thrust::nullopt_t,
+        cuda::std::nullopt_t,
+        cuda::std::nullopt_t,
         weight_t,
         e_op_t<vertex_t, od_idx_t, key_t, weight_t, GraphViewType::is_multi_gpu>>
         e_op_wrapper{e_op};
diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh
index 3429672b151..8006bbf4063 100644
--- a/cpp/src/traversal/sssp_impl.cuh
+++ b/cpp/src/traversal/sssp_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,11 +32,11 @@
 
 #include <raft/util/cudart_utils.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/fill.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -52,8 +52,8 @@ struct e_op_t {
   weight_t const* distances{};
   weight_t cutoff{};
 
-  __device__ thrust::optional<thrust::tuple<weight_t, vertex_t>> operator()(
-    vertex_t src, vertex_t dst, weight_t src_val, thrust::nullopt_t, weight_t w) const
+  __device__ cuda::std::optional<thrust::tuple<weight_t, vertex_t>> operator()(
+    vertex_t src, vertex_t dst, weight_t src_val, cuda::std::nullopt_t, weight_t w) const
   {
     auto push         = true;
     auto new_distance = src_val + w;
@@ -65,9 +65,9 @@ struct e_op_t {
       threshold         = old_distance < threshold ? old_distance : threshold;
     }
     if (new_distance >= threshold) { push = false; }
-    return push ? thrust::optional<thrust::tuple<weight_t, vertex_t>>{thrust::make_tuple(
+    return push ? cuda::std::optional<thrust::tuple<weight_t, vertex_t>>{thrust::make_tuple(
                     new_distance, src)}
-                : thrust::nullopt;
+                : cuda::std::nullopt;
   }
 };
 
@@ -223,11 +223,11 @@ void sssp(raft::handle_t const& handle,
         auto new_dist = thrust::get<0>(pushed_val);
         auto update   = (new_dist < v_val);
         return thrust::make_tuple(
-          update ? thrust::optional<size_t>{new_dist < near_far_threshold ? bucket_idx_next_near
-                                                                          : bucket_idx_far}
-                 : thrust::nullopt,
-          update ? thrust::optional<thrust::tuple<weight_t, vertex_t>>{pushed_val}
-                 : thrust::nullopt);
+          update ? cuda::std::optional<size_t>{new_dist < near_far_threshold ? bucket_idx_next_near
+                                                                             : bucket_idx_far}
+                 : cuda::std::nullopt,
+          update ? cuda::std::optional<thrust::tuple<weight_t, vertex_t>>{pushed_val}
+                 : cuda::std::nullopt);
       });
 
     vertex_frontier.bucket(bucket_idx_cur_near).clear();
@@ -250,9 +250,9 @@ void sssp(raft::handle_t const& handle,
             auto dist =
               *(distances + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v));
             return dist >= old_near_far_threshold
-                     ? thrust::optional<size_t>{dist < near_far_threshold ? bucket_idx_cur_near
-                                                                          : bucket_idx_far}
-                     : thrust::nullopt;
+                     ? cuda::std::optional<size_t>{dist < near_far_threshold ? bucket_idx_cur_near
+                                                                             : bucket_idx_far}
+                     : cuda::std::nullopt;
           });
         near_size = vertex_frontier.bucket(bucket_idx_cur_near).aggregate_size();
         far_size  = vertex_frontier.bucket(bucket_idx_far).aggregate_size();
diff --git a/cpp/tests/components/mg_vertex_coloring_test.cu b/cpp/tests/components/mg_vertex_coloring_test.cu
index 17327e35c97..89b1df9264e 100644
--- a/cpp/tests/components/mg_vertex_coloring_test.cu
+++ b/cpp/tests/components/mg_vertex_coloring_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,6 +34,8 @@
 
 #include <raft/random/rng_state.hpp>
 
+#include <cuda/std/optional>
+
 #include <gtest/gtest.h>
 
 #include <chrono>
@@ -130,7 +132,7 @@ class Tests_MGGraphColoring
                   : cugraph::detail::edge_minor_property_view_t<vertex_t, vertex_t const*>(
                       d_colors.data(), vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
-        [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) {
+        [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) {
           if ((src != dst) && (src_color == dst_color)) {
             return uint8_t{1};
           } else {
@@ -168,7 +170,7 @@ class Tests_MGGraphColoring
                       d_colors.begin(), vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
         [renumber_map = (*mg_renumber_map).data()] __device__(
-          auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) {
+          auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) {
           if ((src != dst) && (src_color == dst_color)) {
             return vertex_t{1};
           } else {
diff --git a/cpp/tests/components/vertex_coloring_test.cu b/cpp/tests/components/vertex_coloring_test.cu
index fed64f272d7..cf55146c5ba 100644
--- a/cpp/tests/components/vertex_coloring_test.cu
+++ b/cpp/tests/components/vertex_coloring_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,8 @@
 
 #include <raft/random/rng_state.hpp>
 
+#include <cuda/std/optional>
+
 #include <gtest/gtest.h>
 
 #include <chrono>
@@ -107,7 +109,7 @@ class Tests_SGGraphColoring
         cugraph::detail::edge_minor_property_view_t<vertex_t, vertex_t const*>(d_colors.data(),
                                                                                vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
-        [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) {
+        [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) {
           if ((src != dst) && (src_color == dst_color)) {
             return uint8_t{1};
           } else {
@@ -142,7 +144,7 @@ class Tests_SGGraphColoring
                                                                                vertex_t{0}),
         cugraph::edge_dummy_property_t{}.view(),
         [renumber_map = (*sg_renumber_map).data()] __device__(
-          auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) {
+          auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) {
           if ((src != dst) && (src_color == dst_color)) {
             return vertex_t{1};
           } else {
diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu
index 63a785fb182..8796383f45d 100644
--- a/cpp/tests/prims/mg_count_if_e.cu
+++ b/cpp/tests/prims/mg_count_if_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,11 +37,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -127,15 +127,16 @@ class Tests_MGCountIfE
       hr_timer.start("MG count_if_e");
     }
 
-    auto result = count_if_e(
-      *handle_,
-      mg_graph_view,
-      mg_src_prop.view(),
-      mg_dst_prop.view(),
-      cugraph::edge_dummy_property_t{}.view(),
-      [] __device__(auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) {
-        return src_property < dst_property;
-      });
+    auto result =
+      count_if_e(*handle_,
+                 mg_graph_view,
+                 mg_src_prop.view(),
+                 mg_dst_prop.view(),
+                 cugraph::edge_dummy_property_t{}.view(),
+                 [] __device__(
+                   auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) {
+                   return src_property < dst_property;
+                 });
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -179,7 +180,7 @@ class Tests_MGCountIfE
           sg_dst_prop.view(),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) {
+            auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) {
             return src_property < dst_property;
           });
         ASSERT_TRUE(expected_result == result);
diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu
index d3d6524cbdb..27e3f471c5b 100644
--- a/cpp/tests/prims/mg_extract_transform_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_e.cu
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,11 +40,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/equal.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -65,12 +65,12 @@ struct e_op_t {
                 std::is_same_v<output_payload_t, thrust::tuple<float, int32_t>>);
 
   using return_type =
-    thrust::optional<std::conditional_t<std::is_arithmetic_v<output_payload_t>,
-                                        thrust::tuple<vertex_t, vertex_t, int32_t>,
-                                        thrust::tuple<vertex_t, vertex_t, float, int32_t>>>;
+    cuda::std::optional<std::conditional_t<std::is_arithmetic_v<output_payload_t>,
+                                           thrust::tuple<vertex_t, vertex_t, int32_t>,
+                                           thrust::tuple<vertex_t, vertex_t, float, int32_t>>>;
 
   __device__ return_type operator()(
-    vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const
+    vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, cuda::std::nullopt_t) const
   {
     auto output_payload = static_cast<output_payload_t>(1);
     if (src_val < dst_val) {
@@ -82,7 +82,7 @@ struct e_op_t {
           src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload));
       }
     } else {
-      return thrust::nullopt;
+      return cuda::std::nullopt;
     }
   }
 };
diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
index a8393d84e43..0c625da0a6d 100644
--- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,11 +39,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/equal.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -65,7 +65,7 @@ struct e_op_t {
   static_assert(std::is_same_v<output_payload_t, int32_t> ||
                 std::is_same_v<output_payload_t, thrust::tuple<float, int32_t>>);
 
-  using return_type = thrust::optional<typename std::conditional_t<
+  using return_type = cuda::std::optional<typename std::conditional_t<
     std::is_same_v<key_t, vertex_t>,
     std::conditional_t<std::is_arithmetic_v<output_payload_t>,
                        thrust::tuple<vertex_t, vertex_t, int32_t>,
@@ -78,7 +78,7 @@ struct e_op_t {
                                     vertex_t dst,
                                     property_t src_val,
                                     property_t dst_val,
-                                    thrust::nullopt_t) const
+                                    cuda::std::nullopt_t) const
   {
     auto output_payload = static_cast<output_payload_t>(1);
     if (src_val < dst_val) {
@@ -109,7 +109,7 @@ struct e_op_t {
         }
       }
     } else {
-      return thrust::nullopt;
+      return cuda::std::nullopt;
     }
   }
 };
diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu
index 386fce24a87..30a53cd15a4 100644
--- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu
+++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,9 +41,9 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/adjacent_difference.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <gtest/gtest.h>
@@ -53,7 +53,7 @@
 template <typename vertex_t, typename bias_t>
 struct e_bias_op_t {
   __device__ bias_t
-  operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const
+  operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const
   {
     return bias;
   }
@@ -65,8 +65,11 @@ struct e_op_t {
                                                       cugraph::to_thrust_tuple(property_t{}),
                                                       cugraph::to_thrust_tuple(property_t{})));
 
-  __device__ result_t operator()(
-    vertex_t src, vertex_t dst, property_t src_prop, property_t dst_prop, thrust::nullopt_t) const
+  __device__ result_t operator()(vertex_t src,
+                                 vertex_t dst,
+                                 property_t src_prop,
+                                 property_t dst_prop,
+                                 cuda::std::nullopt_t) const
   {
     if constexpr (cugraph::is_thrust_tuple_of_arithmetic<property_t>::value) {
       static_assert(thrust::tuple_size<property_t>::value == size_t{2});
@@ -401,18 +404,19 @@ class Tests_MGPerVRandomSelectTransformOutgoingE
           thrust::make_counting_iterator(size_t{0}),
           thrust::make_counting_iterator(mg_aggregate_frontier_vertices.size()),
           [frontier_vertex_first = mg_aggregate_frontier_vertices.begin(),
-           sample_offsets = mg_aggregate_sample_offsets ? thrust::make_optional<size_t const*>(
+           sample_offsets = mg_aggregate_sample_offsets ? cuda::std::make_optional<size_t const*>(
                                                             (*mg_aggregate_sample_offsets).data())
-                                                        : thrust::nullopt,
+                                                        : cuda::std::nullopt,
            sample_e_op_result_first =
              cugraph::get_dataframe_buffer_begin(mg_aggregate_sample_e_op_results),
            sg_offsets = sg_offsets.begin(),
            sg_indices = sg_indices.begin(),
-           sg_biases  = sg_biases ? thrust::make_optional((*sg_biases).begin()) : thrust::nullopt,
-           K          = prims_usecase.K,
+           sg_biases =
+             sg_biases ? cuda::std::make_optional((*sg_biases).begin()) : cuda::std::nullopt,
+           K                = prims_usecase.K,
            with_replacement = prims_usecase.with_replacement,
-           invalid_value =
-             invalid_value ? thrust::make_optional<result_t>(*invalid_value) : thrust::nullopt,
+           invalid_value    = invalid_value ? cuda::std::make_optional<result_t>(*invalid_value)
+                                            : cuda::std::nullopt,
            property_transform =
              cugraph::test::detail::vertex_property_transform<vertex_t, property_t>{
                hash_bin_count}] __device__(size_t i) {
@@ -461,8 +465,8 @@ class Tests_MGPerVRandomSelectTransformOutgoingE
               auto sg_nbr_first = sg_indices + *(sg_offsets + sg_src);
               auto sg_nbr_last  = sg_indices + *(sg_offsets + (sg_src + vertex_t{1}));
               auto sg_nbr_bias_first =
-                sg_biases ? thrust::make_optional((*sg_biases) + *(sg_offsets + sg_src))
-                          : thrust::nullopt;
+                sg_biases ? cuda::std::make_optional((*sg_biases) + *(sg_offsets + sg_src))
+                          : cuda::std::nullopt;
               if (sg_src != v) { return true; }
 
               if (sg_nbr_bias_first) {
diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu
index 3dd256544b4..040e0a6d716 100644
--- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu
+++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,12 +40,12 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/equal.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -218,7 +218,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
               cugraph::edge_dummy_property_t{}.view(),
               mg_dst_key.view(),
               mg_kv_store.view(),
-              key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+              key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
               property_initial_value,
               cugraph::reduce_op::plus<result_t>{},
               cugraph::get_dataframe_buffer_begin(mg_results[i]));
@@ -245,7 +245,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
               cugraph::edge_dummy_property_t{}.view(),
               mg_dst_key.view(),
               mg_kv_store.view(),
-              key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+              key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
               property_initial_value,
               cugraph::reduce_op::elementwise_minimum<result_t>{},
               cugraph::get_dataframe_buffer_begin(mg_results[i]));
@@ -272,7 +272,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
               cugraph::edge_dummy_property_t{}.view(),
               mg_dst_key.view(),
               mg_kv_store.view(),
-              key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+              key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
               property_initial_value,
               cugraph::reduce_op::elementwise_maximum<result_t>{},
               cugraph::get_dataframe_buffer_begin(mg_results[i]));
@@ -414,7 +414,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
                   cugraph::edge_dummy_property_t{}.view(),
                   sg_dst_key.view(),
                   sg_kv_store.view(),
-                  key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+                  key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
                   property_initial_value,
                   cugraph::reduce_op::plus<result_t>{},
                   cugraph::get_dataframe_buffer_begin(global_result));
@@ -441,7 +441,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
                   cugraph::edge_dummy_property_t{}.view(),
                   sg_dst_key.view(),
                   sg_kv_store.view(),
-                  key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+                  key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
                   property_initial_value,
                   cugraph::reduce_op::elementwise_minimum<result_t>{},
                   cugraph::get_dataframe_buffer_begin(global_result));
@@ -468,7 +468,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE
                   cugraph::edge_dummy_property_t{}.view(),
                   sg_dst_key.view(),
                   sg_kv_store.view(),
-                  key_aggregated_e_op_t<vertex_t, thrust::nullopt_t, result_t>{},
+                  key_aggregated_e_op_t<vertex_t, cuda::std::nullopt_t, result_t>{},
                   property_initial_value,
                   cugraph::reduce_op::elementwise_maximum<result_t>{},
                   cugraph::get_dataframe_buffer_begin(global_result));
diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
index 41830b3017c..57d77f6c4bd 100644
--- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
+++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,12 +40,12 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/equal.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -62,7 +62,7 @@ struct e_op_t {
                                  vertex_t dst,
                                  result_t src_property,
                                  result_t dst_property,
-                                 thrust::nullopt_t) const
+                                 cuda::std::nullopt_t) const
   {
     if (src_property < dst_property) {
       return src_property;
diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu
index 3984c7cd86b..c94637cc657 100644
--- a/cpp/tests/prims/mg_transform_e.cu
+++ b/cpp/tests/prims/mg_transform_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,10 +37,10 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <cuco/hash_functions.cuh>
@@ -177,7 +177,8 @@ class Tests_MGTransformE
         mg_src_prop.view(),
         mg_dst_prop.view(),
         cugraph::edge_dummy_property_t{}.view(),
-        [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+        [] __device__(
+          auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
           if (src_property < dst_property) {
             return src_property;
           } else {
@@ -192,7 +193,8 @@ class Tests_MGTransformE
         mg_src_prop.view(),
         mg_dst_prop.view(),
         cugraph::edge_dummy_property_t{}.view(),
-        [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+        [] __device__(
+          auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
           if (src_property < dst_property) {
             return src_property;
           } else {
diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu
index e290f05e9e4..b5dcfaa7aa7 100644
--- a/cpp/tests/prims/mg_transform_reduce_e.cu
+++ b/cpp/tests/prims/mg_transform_reduce_e.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,11 +38,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -138,7 +138,8 @@ class Tests_MGTransformReduceE
       mg_src_prop.view(),
       mg_dst_prop.view(),
       cugraph::edge_dummy_property_t{}.view(),
-      [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+      [] __device__(
+        auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
         if (src_property < dst_property) {
           return src_property;
         } else {
@@ -189,7 +190,7 @@ class Tests_MGTransformReduceE
           sg_dst_prop.view(),
           cugraph::edge_dummy_property_t{}.view(),
           [] __device__(
-            auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+            auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
             if (src_property < dst_property) {
               return src_property;
             } else {
diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu
index b050e314a15..830b48acade 100644
--- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu
+++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,11 +39,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
@@ -149,7 +149,8 @@ class Tests_MGTransformReduceEBySrcDstKey
       mg_dst_prop.view(),
       cugraph::edge_dummy_property_t{}.view(),
       mg_src_key.view(),
-      [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+      [] __device__(
+        auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
         if (src_property < dst_property) {
           return src_property;
         } else {
@@ -179,7 +180,8 @@ class Tests_MGTransformReduceEBySrcDstKey
       mg_dst_prop.view(),
       cugraph::edge_dummy_property_t{}.view(),
       mg_dst_key.view(),
-      [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+      [] __device__(
+        auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
         if (src_property < dst_property) {
           return src_property;
         } else {
@@ -279,7 +281,7 @@ class Tests_MGTransformReduceEBySrcDstKey
           cugraph::edge_dummy_property_t{}.view(),
           sg_src_key.view(),
           [] __device__(
-            auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+            auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
             if (src_property < dst_property) {
               return src_property;
             } else {
@@ -301,7 +303,7 @@ class Tests_MGTransformReduceEBySrcDstKey
           cugraph::edge_dummy_property_t{}.view(),
           sg_dst_key.view(),
           [] __device__(
-            auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) {
+            auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) {
             if (src_property < dst_property) {
               return src_property;
             } else {
diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
index 085077017b3..acc89491e56 100644
--- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
+++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,11 +38,11 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/equal.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/tabulate.h>
@@ -62,25 +62,25 @@ struct e_op_t {
                              vertex_t dst,
                              property_t src_val,
                              property_t dst_val,
-                             thrust::nullopt_t) const
+                             cuda::std::nullopt_t) const
   {
     if constexpr (std::is_same_v<key_t, vertex_t>) {
       if constexpr (std::is_same_v<payload_t, void>) {
-        return src_val < dst_val ? thrust::optional<std::byte>{std::byte{0}} /* dummy */
-                                 : thrust::nullopt;
+        return src_val < dst_val ? cuda::std::optional<std::byte>{std::byte{0}} /* dummy */
+                                 : cuda::std::nullopt;
       } else {
-        return src_val < dst_val ? thrust::optional<payload_t>{static_cast<payload_t>(1)}
-                                 : thrust::nullopt;
+        return src_val < dst_val ? cuda::std::optional<payload_t>{static_cast<payload_t>(1)}
+                                 : cuda::std::nullopt;
       }
     } else {
       auto tag = thrust::get<1>(optionally_tagged_src);
       if constexpr (std::is_same_v<payload_t, void>) {
-        return src_val < dst_val ? thrust::optional<decltype(tag)>{tag} : thrust::nullopt;
+        return src_val < dst_val ? cuda::std::optional<decltype(tag)>{tag} : cuda::std::nullopt;
       } else {
         return src_val < dst_val
-                 ? thrust::optional<thrust::tuple<decltype(tag), payload_t>>{thrust::make_tuple(
+                 ? cuda::std::optional<thrust::tuple<decltype(tag), payload_t>>{thrust::make_tuple(
                      tag, static_cast<payload_t>(1))}
-                 : thrust::nullopt;
+                 : cuda::std::nullopt;
       }
     }
   }
diff --git a/cpp/tests/prims/result_compare.cuh b/cpp/tests/prims/result_compare.cuh
index 5a1abb90e3c..7ee87d402cd 100644
--- a/cpp/tests/prims/result_compare.cuh
+++ b/cpp/tests/prims/result_compare.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,8 +19,8 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/std/optional>
 #include <thrust/equal.h>
-#include <thrust/optional.h>
 #include <thrust/tuple.h>
 
 #include <algorithm>
@@ -36,7 +36,7 @@ namespace detail {
 template <typename T>
 __host__ __device__ bool compare_arithmetic_scalar(T val0,
                                                    T val1,
-                                                   thrust::optional<T> threshold_ratio)
+                                                   cuda::std::optional<T> threshold_ratio)
 {
   if (threshold_ratio) {
     return std::abs(val0 - val1) <= (std::max(std::abs(val0), std::abs(val1)) * *threshold_ratio);
@@ -58,15 +58,16 @@ struct comparator {
       return detail::compare_arithmetic_scalar(
         t0,
         t1,
-        std::is_floating_point_v<T> ? thrust::optional<T>{threshold_ratio} : thrust::nullopt);
+        std::is_floating_point_v<T> ? cuda::std::optional<T>{threshold_ratio} : cuda::std::nullopt);
     } else {
-      auto val0   = thrust::get<0>(t0);
-      auto val1   = thrust::get<0>(t1);
-      auto passed = detail::compare_arithmetic_scalar(
-        val0,
-        val1,
-        std::is_floating_point_v<decltype(val0)> ? thrust::optional<decltype(val0)>{threshold_ratio}
-                                                 : thrust::nullopt);
+      auto val0 = thrust::get<0>(t0);
+      auto val1 = thrust::get<0>(t1);
+      auto passed =
+        detail::compare_arithmetic_scalar(val0,
+                                          val1,
+                                          std::is_floating_point_v<decltype(val0)>
+                                            ? cuda::std::optional<decltype(val0)>{threshold_ratio}
+                                            : cuda::std::nullopt);
       if (!passed) return false;
 
       if constexpr (thrust::tuple_size<T>::value >= 2) {
@@ -76,8 +77,8 @@ struct comparator {
           detail::compare_arithmetic_scalar(val0,
                                             val1,
                                             std::is_floating_point_v<decltype(val1)>
-                                              ? thrust::optional<decltype(val1)>{threshold_ratio}
-                                              : thrust::nullopt);
+                                              ? cuda::std::optional<decltype(val1)>{threshold_ratio}
+                                              : cuda::std::nullopt);
         if (!passed) return false;
       }
       if constexpr (thrust::tuple_size<T>::value >= 3) {
diff --git a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu
index a0babc3b921..ac0523bbce7 100644
--- a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu
+++ b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cuda/functional>
+#include <cuda/std/optional>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/equal.h>
@@ -383,26 +384,27 @@ bool compare_heterogeneous_edgelist(
       this_label_org_sorted_indices.begin(),
       this_label_org_sorted_indices.end(),
       [edge_types = org_edgelist_edge_types
-                      ? thrust::make_optional<raft::device_span<edge_type_t const>>(
+                      ? cuda::std::make_optional<raft::device_span<edge_type_t const>>(
                           (*org_edgelist_edge_types).data() + label_start_offset,
                           label_end_offset - label_start_offset)
-                      : thrust::nullopt,
-       hops       = org_edgelist_hops ? thrust::make_optional<raft::device_span<int32_t const>>(
+                      : cuda::std::nullopt,
+       hops       = org_edgelist_hops ? cuda::std::make_optional<raft::device_span<int32_t const>>(
                                     (*org_edgelist_hops).data() + label_start_offset,
                                     label_end_offset - label_start_offset)
-                                      : thrust::nullopt,
+                                      : cuda::std::nullopt,
        srcs       = raft::device_span<vertex_t const>(org_edgelist_srcs.data() + label_start_offset,
                                                 label_end_offset - label_start_offset),
        dsts       = raft::device_span<vertex_t const>(org_edgelist_dsts.data() + label_start_offset,
                                                 label_end_offset - label_start_offset),
-       weights    = org_edgelist_weights ? thrust::make_optional<raft::device_span<weight_t const>>(
+       weights = org_edgelist_weights ? cuda::std::make_optional<raft::device_span<weight_t const>>(
                                           (*org_edgelist_weights).data() + label_start_offset,
                                           label_end_offset - label_start_offset)
-                                         : thrust::nullopt,
-       edge_ids = org_edgelist_edge_ids ? thrust::make_optional<raft::device_span<edge_id_t const>>(
-                                            (*org_edgelist_edge_ids).data() + label_start_offset,
-                                            label_end_offset - label_start_offset)
-                                        : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) {
+                                      : cuda::std::nullopt,
+       edge_ids = org_edgelist_edge_ids
+                    ? cuda::std::make_optional<raft::device_span<edge_id_t const>>(
+                        (*org_edgelist_edge_ids).data() + label_start_offset,
+                        label_end_offset - label_start_offset)
+                    : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) {
         edge_type_t l_edge_type{0};
         edge_type_t r_edge_type{0};
         if (edge_types) {
@@ -673,15 +675,15 @@ bool compare_heterogeneous_edgelist(
              raft::device_span<vertex_t const>(this_edge_type_unrenumbered_edgelist_dsts.data(),
                                                this_edge_type_unrenumbered_edgelist_dsts.size()),
            weights  = renumbered_edgelist_weights
-                        ? thrust::make_optional<raft::device_span<weight_t const>>(
+                        ? cuda::std::make_optional<raft::device_span<weight_t const>>(
                            (*renumbered_edgelist_weights).data() + edge_type_start_offset,
                            edge_type_end_offset - edge_type_start_offset)
-                        : thrust::nullopt,
+                        : cuda::std::nullopt,
            edge_ids = renumbered_edgelist_edge_ids
-                        ? thrust::make_optional<raft::device_span<edge_id_t const>>(
+                        ? cuda::std::make_optional<raft::device_span<edge_id_t const>>(
                             (*renumbered_edgelist_edge_ids).data() + edge_type_start_offset,
                             edge_type_end_offset - edge_type_start_offset)
-                        : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) {
+                        : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) {
             vertex_t l_src = srcs[l_idx];
             vertex_t r_src = srcs[r_idx];
 
@@ -721,15 +723,15 @@ bool compare_heterogeneous_edgelist(
                raft::device_span<vertex_t const>(org_edgelist_dsts.data() + label_start_offset,
                                                  label_end_offset - label_start_offset),
              org_weights  = org_edgelist_weights
-                              ? thrust::make_optional<raft::device_span<weight_t const>>(
+                              ? cuda::std::make_optional<raft::device_span<weight_t const>>(
                                  (*org_edgelist_weights).data() + label_start_offset,
                                  label_end_offset - label_start_offset)
-                              : thrust::nullopt,
+                              : cuda::std::nullopt,
              org_edge_ids = org_edgelist_edge_ids
-                              ? thrust::make_optional<raft::device_span<edge_id_t const>>(
+                              ? cuda::std::make_optional<raft::device_span<edge_id_t const>>(
                                   (*org_edgelist_edge_ids).data() + label_start_offset,
                                   label_end_offset - label_start_offset)
-                              : thrust::nullopt,
+                              : cuda::std::nullopt,
              unrenumbered_srcs =
                raft::device_span<vertex_t const>(this_edge_type_unrenumbered_edgelist_srcs.data(),
                                                  this_edge_type_unrenumbered_edgelist_srcs.size()),
@@ -738,16 +740,16 @@ bool compare_heterogeneous_edgelist(
                                                  this_edge_type_unrenumbered_edgelist_dsts.size()),
              unrenumbered_weights =
                renumbered_edgelist_weights
-                 ? thrust::make_optional<raft::device_span<weight_t const>>(
+                 ? cuda::std::make_optional<raft::device_span<weight_t const>>(
                      (*renumbered_edgelist_weights).data() + edge_type_start_offset,
                      edge_type_end_offset - edge_type_start_offset)
-                 : thrust::nullopt,
+                 : cuda::std::nullopt,
              unrenumbered_edge_ids =
                unrenumbered_edgelist_edge_ids
-                 ? thrust::make_optional<raft::device_span<edge_id_t const>>(
+                 ? cuda::std::make_optional<raft::device_span<edge_id_t const>>(
                      (*unrenumbered_edgelist_edge_ids).data(),
                      (*unrenumbered_edgelist_edge_ids).size())
-                 : thrust::
+                 : cuda::std::
                      nullopt] __device__(size_t org_idx /* from label_start_offset */,
                                          size_t
                                            unrenumbered_idx /* from edge_type_start_offset */) {
diff --git a/cpp/tests/utilities/property_generator_kernels.cuh b/cpp/tests/utilities/property_generator_kernels.cuh
index 78b22e0dac2..5c4bc00cdfa 100644
--- a/cpp/tests/utilities/property_generator_kernels.cuh
+++ b/cpp/tests/utilities/property_generator_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 #include "prims/update_edge_src_dst_property.cuh"
 #include "utilities/property_generator_utilities.hpp"
 
-#include <thrust/optional.h>
+#include <cuda/std/optional>
 #include <thrust/tuple.h>
 
 #include <cuco/hash_functions.cuh>
@@ -69,8 +69,11 @@ template <typename vertex_t, typename property_t>
 struct edge_property_transform {
   int32_t mod{};
 
-  constexpr __device__ property_t operator()(
-    vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const
+  constexpr __device__ property_t operator()(vertex_t src,
+                                             vertex_t dst,
+                                             cuda::std::nullopt_t,
+                                             cuda::std::nullopt_t,
+                                             cuda::std::nullopt_t) const
   {
     static_assert(cugraph::is_thrust_tuple_of_arithmetic<property_t>::value ||
                   std::is_arithmetic_v<property_t>);

From 9e3a45722042d07083846de1fb39cbd348c75e90 Mon Sep 17 00:00:00 2001
From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com>
Date: Wed, 29 Jan 2025 14:34:23 -0800
Subject: [PATCH 4/9] [BUG] Output Edge Labels in the Distributed Sampler
 (#4898)

We currently do not output edge labels in the distributed sampler, which breaks some link prediction workflows where the graph contains pre-labeled edges.  This PR adds support for that so these workflows can be enabled.

Authors:
  - Alex Barghi (https://github.com/alexbarghi-nv)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/4898
---
 .../cugraph/gnn/data_loading/dist_sampler.py  | 37 ++++++++++++++++---
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py
index 2edafe95716..6ac4ca142ee 100644
--- a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py
+++ b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py
@@ -223,6 +223,7 @@ def __get_call_groups(
         input_id: TensorType,
         seeds_per_call: int,
         assume_equal_input_size: bool = False,
+        label: Optional[TensorType] = None,
     ):
         torch = import_optional("torch")
 
@@ -231,6 +232,8 @@ def __get_call_groups(
         # many batches.
         seeds_call_groups = torch.split(seeds, seeds_per_call, dim=-1)
         index_call_groups = torch.split(input_id, seeds_per_call, dim=-1)
+        if label is not None:
+            label_call_groups = torch.split(label, seeds_per_call, dim=-1)
 
         # Need to add empties to the list of call groups to handle the case
         # where not all ranks have the same number of call groups.  This
@@ -251,8 +254,16 @@ def __get_call_groups(
                 [torch.tensor([], dtype=torch.int64, device=input_id.device)]
                 * (int(num_call_groups) - len(index_call_groups))
             )
+            if label is not None:
+                label_call_groups = list(label_call_groups) + (
+                    [torch.tensor([], dtype=label.dtype, device=label.device)]
+                    * (int(num_call_groups) - len(label_call_groups))
+                )
 
-        return seeds_call_groups, index_call_groups
+        if label is not None:
+            return seeds_call_groups, index_call_groups, label_call_groups
+        else:
+            return seeds_call_groups, index_call_groups
 
     def sample_from_nodes(
         self,
@@ -344,7 +355,7 @@ def sample_from_nodes(
     def __sample_from_edges_func(
         self,
         call_id: int,
-        current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor"],
+        current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"],
         batch_id_start: int,
         batch_size: int,
         batches_per_call: int,
@@ -353,7 +364,7 @@ def __sample_from_edges_func(
     ) -> Union[None, Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]]:
         torch = import_optional("torch")
 
-        current_seeds, current_ix = current_seeds_and_ix
+        current_seeds, current_ix, current_label = current_seeds_and_ix
         num_seed_edges = current_ix.numel()
 
         # The index gets stored as-is regardless of what makes it into
@@ -468,6 +479,7 @@ def __sample_from_edges_func(
             random_state=random_state,
         )
         minibatch_dict["input_index"] = current_ix.cuda()
+        minibatch_dict["input_label"] = current_label.cuda()
         minibatch_dict["input_offsets"] = input_offsets
         minibatch_dict[
             "edge_inverse"
@@ -505,6 +517,7 @@ def sample_from_edges(
         random_state: int = 62,
         assume_equal_input_size: bool = False,
         input_id: Optional[TensorType] = None,
+        input_label: Optional[TensorType] = None,
     ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]:
         """
         Performs sampling starting from seed edges.
@@ -527,6 +540,10 @@ def sample_from_edges(
             Input ids corresponding to the original batch tensor, if it
             was permuted prior to calling this function.  If present,
             will be saved with the samples.
+        input_label: Optional[TensorType]
+            Input labels corresponding to the input seeds.  Typically used
+            for link prediction sampling.  If present, will be saved with
+            the samples.  Generally not compatible with negative sampling.
         """
 
         torch = import_optional("torch")
@@ -545,12 +562,20 @@ def sample_from_edges(
             local_num_batches, assume_equal_input_size=assume_equal_input_size
         )
 
-        edges_call_groups, index_call_groups = self.__get_call_groups(
+        groups = self.__get_call_groups(
             edges,
             input_id,
             actual_seed_edges_per_call,
             assume_equal_input_size=input_size_is_equal,
+            label=input_label,
         )
+        if len(groups) == 2:
+            edges_call_groups, index_call_groups = groups
+            label_call_groups = [torch.tensor([], dtype=torch.int32)] * len(
+                edges_call_groups
+            )
+        else:
+            edges_call_groups, index_call_groups, label_call_groups = groups
 
         sample_args = [
             batch_id_start,
@@ -563,14 +588,14 @@ def sample_from_edges(
         if self.__writer is None:
             # Buffered sampling
             return BufferedSampleReader(
-                zip(edges_call_groups, index_call_groups),
+                zip(edges_call_groups, index_call_groups, label_call_groups),
                 self.__sample_from_edges_func,
                 *sample_args,
             )
         else:
             # Unbuffered sampling
             for i, current_seeds_and_ix in enumerate(
-                zip(edges_call_groups, index_call_groups)
+                zip(edges_call_groups, index_call_groups, label_call_groups)
             ):
                 sample_args[0] = self.__sample_from_edges_func(
                     i,

From 5a41b41549e2afa5efbdf5b19f7601013a2450f0 Mon Sep 17 00:00:00 2001
From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com>
Date: Thu, 30 Jan 2025 00:48:08 -0500
Subject: [PATCH 5/9] Update MG negative sampling to return random samples
 distributed as specified (#4885)

Modifies the new negative sampling interface so that when called from MG, each rank specifies how many samples they wish to receive, and to randomly distribute the samples across the calling GPUs.

Marked breaking as it changes the C++ interface... although nothing uses it yet.

Closes #4672

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)

URL: https://github.com/rapidsai/cugraph/pull/4885
---
 cpp/include/cugraph/sampling_functions.hpp  |   5 +-
 cpp/src/detail/permute_range.cuh            |   4 +-
 cpp/src/detail/permute_range_v32.cu         |  19 +-
 cpp/src/detail/permute_range_v64.cu         |  19 +-
 cpp/src/sampling/negative_sampling_impl.cuh | 240 ++++++++++++++++----
 cpp/tests/sampling/mg_negative_sampling.cpp |  34 +--
 6 files changed, 210 insertions(+), 111 deletions(-)

diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp
index 35c51c1ea6d..0ccf49ddfb6 100644
--- a/cpp/include/cugraph/sampling_functions.hpp
+++ b/cpp/include/cugraph/sampling_functions.hpp
@@ -1152,7 +1152,10 @@ lookup_endpoints_from_edge_ids_and_types(
  * @param dst_biases Optional bias for randomly selecting destination vertices.  If std::nullopt
  * vertices will be selected uniformly.  In multi-GPU environment the biases should be partitioned
  * based on the vertex partitions.
- * @param num_samples Number of negative samples to generate
+ * @param num_samples Number of negative samples to generate.  In SG mode this represents the total
+ * number of samples to generate.  In MG mode, each gpu will provide the number of samples desired
+ * on that GPU.  The total number of samples in MG mode will be the aggregation of these values, the
+ * resulting samples will be randomly distributed across the ranks.
  * @param remove_duplicates If true, remove duplicate samples
  * @param remove_existing_edges If true, remove samples that are actually edges in the graph
  * @param exact_number_of_samples If true, repeat generation until we get the exact number of
diff --git a/cpp/src/detail/permute_range.cuh b/cpp/src/detail/permute_range.cuh
index c7cd57c2048..a9d1b27f52d 100644
--- a/cpp/src/detail/permute_range.cuh
+++ b/cpp/src/detail/permute_range.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -58,7 +58,7 @@ rmm::device_uvector<vertex_t> permute_range(raft::handle_t const& handle,
       sub_range_sizes.begin(), sub_range_sizes.end(), sub_range_sizes.begin(), global_start);
     CUGRAPH_EXPECTS(
       sub_range_sizes[comm_rank] == local_range_start,
-      "Invalid input arguments: a rage must have contiguous and non-overlapping values");
+      "Invalid input arguments: a range must have contiguous and non-overlapping values");
   }
   rmm::device_uvector<vertex_t> permuted_integers(local_range_size, handle.get_stream());
 
diff --git a/cpp/src/detail/permute_range_v32.cu b/cpp/src/detail/permute_range_v32.cu
index 6a7bc059901..91d23487f03 100644
--- a/cpp/src/detail/permute_range_v32.cu
+++ b/cpp/src/detail/permute_range_v32.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,23 +16,6 @@
 
 #include "detail/permute_range.cuh"
 
-#include <cugraph/detail/collect_comm_wrapper.hpp>
-#include <cugraph/detail/shuffle_wrappers.hpp>
-#include <cugraph/detail/utility_wrappers.hpp>
-#include <cugraph/utilities/host_scalar_comm.hpp>
-#include <cugraph/utilities/shuffle_comm.cuh>
-
-#include <raft/core/device_span.hpp>
-#include <raft/core/handle.hpp>
-#include <raft/random/rng_state.hpp>
-
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/reduce.h>
-#include <thrust/sort.h>
-
 namespace cugraph {
 
 namespace detail {
diff --git a/cpp/src/detail/permute_range_v64.cu b/cpp/src/detail/permute_range_v64.cu
index ad7daf16419..a6dbc9a72ae 100644
--- a/cpp/src/detail/permute_range_v64.cu
+++ b/cpp/src/detail/permute_range_v64.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,23 +16,6 @@
 
 #include "detail/permute_range.cuh"
 
-#include <cugraph/detail/collect_comm_wrapper.hpp>
-#include <cugraph/detail/shuffle_wrappers.hpp>
-#include <cugraph/detail/utility_wrappers.hpp>
-#include <cugraph/utilities/host_scalar_comm.hpp>
-#include <cugraph/utilities/shuffle_comm.cuh>
-
-#include <raft/core/device_span.hpp>
-#include <raft/core/handle.hpp>
-#include <raft/random/rng_state.hpp>
-
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/reduce.h>
-#include <thrust/sort.h>
-
 namespace cugraph {
 
 namespace detail {
diff --git a/cpp/src/sampling/negative_sampling_impl.cuh b/cpp/src/sampling/negative_sampling_impl.cuh
index 541eda67860..9aedc5dfc35 100644
--- a/cpp/src/sampling/negative_sampling_impl.cuh
+++ b/cpp/src/sampling/negative_sampling_impl.cuh
@@ -16,8 +16,11 @@
 
 #pragma once
 
+#include "cugraph/detail/collect_comm_wrapper.hpp"
+#include "cugraph/utilities/device_comm.hpp"
 #include "prims/reduce_v.cuh"
 #include "prims/update_edge_src_dst_property.cuh"
+#include "thrust/iterator/zip_iterator.h"
 #include "utilities/collect_comm.cuh"
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
@@ -26,6 +29,10 @@
 #include <cugraph/utilities/device_functors.cuh>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 
+#include <raft/core/device_span.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/util/cudart_utils.hpp>
+
 #include <rmm/device_scalar.hpp>
 
 #include <thrust/adjacent_difference.h>
@@ -37,6 +44,8 @@
 #include <thrust/transform.h>
 #include <thrust/unique.h>
 
+#include <tuple>
+
 namespace cugraph {
 
 namespace detail {
@@ -265,11 +274,19 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
   bool exact_number_of_samples,
   bool do_expensive_check)
 {
-  rmm::device_uvector<vertex_t> src(0, handle.get_stream());
-  rmm::device_uvector<vertex_t> dst(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> dsts(0, handle.get_stream());
 
   // Optimistically assume we can do this in one pass
-  size_t samples_in_this_batch = num_samples;
+  size_t total_samples{num_samples};
+  std::vector<size_t> samples_per_gpu;
+
+  if constexpr (multi_gpu) {
+    samples_per_gpu = host_scalar_allgather(handle.get_comms(), num_samples, handle.get_stream());
+    total_samples   = std::reduce(samples_per_gpu.begin(), samples_per_gpu.end());
+  }
+
+  size_t samples_in_this_batch = total_samples;
 
   // Normalize the biases and (for MG) determine how the biases are
   // distributed across the GPUs.
@@ -298,16 +315,16 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
            : 0);
     }
 
-    auto batch_src = create_local_samples(
+    auto batch_srcs = create_local_samples(
       handle, rng_state, graph_view, normalized_src_biases, gpu_src_biases, samples_in_this_batch);
-    auto batch_dst = create_local_samples(
+    auto batch_dsts = create_local_samples(
       handle, rng_state, graph_view, normalized_dst_biases, gpu_dst_biases, samples_in_this_batch);
 
     if constexpr (multi_gpu) {
       auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
 
-      std::tie(batch_src,
-               batch_dst,
+      std::tie(batch_srcs,
+               batch_dsts,
                std::ignore,
                std::ignore,
                std::ignore,
@@ -320,8 +337,8 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
                                                                                        int32_t,
                                                                                        int32_t>(
           handle,
-          std::move(batch_src),
-          std::move(batch_dst),
+          std::move(batch_srcs),
+          std::move(batch_dsts),
           std::nullopt,
           std::nullopt,
           std::nullopt,
@@ -333,42 +350,43 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
     if (remove_existing_edges) {
       auto has_edge_flags =
         graph_view.has_edge(handle,
-                            raft::device_span<vertex_t const>{batch_src.data(), batch_src.size()},
-                            raft::device_span<vertex_t const>{batch_dst.data(), batch_dst.size()},
+                            raft::device_span<vertex_t const>{batch_srcs.data(), batch_srcs.size()},
+                            raft::device_span<vertex_t const>{batch_dsts.data(), batch_dsts.size()},
                             do_expensive_check);
 
-      auto begin_iter = thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin());
+      auto begin_iter = thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin());
       auto new_end    = thrust::remove_if(handle.get_thrust_policy(),
                                        begin_iter,
-                                       begin_iter + batch_src.size(),
+                                       begin_iter + batch_srcs.size(),
                                        has_edge_flags.begin(),
                                        thrust::identity<bool>());
 
-      batch_src.resize(thrust::distance(begin_iter, new_end), handle.get_stream());
-      batch_dst.resize(thrust::distance(begin_iter, new_end), handle.get_stream());
+      batch_srcs.resize(thrust::distance(begin_iter, new_end), handle.get_stream());
+      batch_dsts.resize(thrust::distance(begin_iter, new_end), handle.get_stream());
     }
 
     if (remove_duplicates) {
       thrust::sort(handle.get_thrust_policy(),
-                   thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()),
-                   thrust::make_zip_iterator(batch_src.end(), batch_dst.end()));
+                   thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()),
+                   thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end()));
 
-      auto new_end = thrust::unique(handle.get_thrust_policy(),
-                                    thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()),
-                                    thrust::make_zip_iterator(batch_src.end(), batch_dst.end()));
+      auto new_end =
+        thrust::unique(handle.get_thrust_policy(),
+                       thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()),
+                       thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end()));
 
-      size_t new_size =
-        thrust::distance(thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), new_end);
+      size_t new_size = thrust::distance(
+        thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), new_end);
 
-      if (src.size() > 0) {
-        rmm::device_uvector<vertex_t> new_src(src.size() + new_size, handle.get_stream());
-        rmm::device_uvector<vertex_t> new_dst(dst.size() + new_size, handle.get_stream());
+      if (srcs.size() > 0) {
+        rmm::device_uvector<vertex_t> new_src(srcs.size() + new_size, handle.get_stream());
+        rmm::device_uvector<vertex_t> new_dst(dsts.size() + new_size, handle.get_stream());
 
         thrust::merge(handle.get_thrust_policy(),
-                      thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()),
+                      thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()),
                       new_end,
-                      thrust::make_zip_iterator(src.begin(), dst.begin()),
-                      thrust::make_zip_iterator(src.end(), dst.end()),
+                      thrust::make_zip_iterator(srcs.begin(), dsts.begin()),
+                      thrust::make_zip_iterator(srcs.end(), dsts.end()),
                       thrust::make_zip_iterator(new_src.begin(), new_dst.begin()));
 
         new_end = thrust::unique(handle.get_thrust_policy(),
@@ -378,32 +396,32 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
         new_size =
           thrust::distance(thrust::make_zip_iterator(new_src.begin(), new_dst.begin()), new_end);
 
-        src = std::move(new_src);
-        dst = std::move(new_dst);
+        srcs = std::move(new_src);
+        dsts = std::move(new_dst);
       } else {
-        src = std::move(batch_src);
-        dst = std::move(batch_dst);
+        srcs = std::move(batch_srcs);
+        dsts = std::move(batch_dsts);
       }
 
-      src.resize(new_size, handle.get_stream());
-      dst.resize(new_size, handle.get_stream());
-    } else if (src.size() > 0) {
-      size_t current_end = src.size();
+      srcs.resize(new_size, handle.get_stream());
+      dsts.resize(new_size, handle.get_stream());
+    } else if (srcs.size() > 0) {
+      size_t current_end = srcs.size();
 
-      src.resize(src.size() + batch_src.size(), handle.get_stream());
-      dst.resize(dst.size() + batch_dst.size(), handle.get_stream());
+      srcs.resize(srcs.size() + batch_srcs.size(), handle.get_stream());
+      dsts.resize(dsts.size() + batch_dsts.size(), handle.get_stream());
 
       thrust::copy(handle.get_thrust_policy(),
-                   thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()),
-                   thrust::make_zip_iterator(batch_src.end(), batch_dst.end()),
-                   thrust::make_zip_iterator(src.begin(), dst.begin()) + current_end);
+                   thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()),
+                   thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end()),
+                   thrust::make_zip_iterator(srcs.begin(), dsts.begin()) + current_end);
     } else {
-      src = std::move(batch_src);
-      dst = std::move(batch_dst);
+      srcs = std::move(batch_srcs);
+      dsts = std::move(batch_dsts);
     }
 
     if (exact_number_of_samples) {
-      size_t current_sample_size = src.size();
+      size_t current_sample_size = srcs.size();
       if constexpr (multi_gpu) {
         current_sample_size = cugraph::host_scalar_allreduce(
           handle.get_comms(), current_sample_size, raft::comms::op_t::SUM, handle.get_stream());
@@ -412,16 +430,142 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> negativ
       // FIXME: We could oversample and discard the unnecessary samples
       // to reduce the number of iterations in the outer loop, but it seems like
       // exact_number_of_samples is an edge case not worth optimizing for at this time.
-      samples_in_this_batch = num_samples - current_sample_size;
+      samples_in_this_batch = total_samples - current_sample_size;
     } else {
       samples_in_this_batch = 0;
     }
   }
 
-  src.shrink_to_fit(handle.get_stream());
-  dst.shrink_to_fit(handle.get_stream());
+  srcs.shrink_to_fit(handle.get_stream());
+  dsts.shrink_to_fit(handle.get_stream());
+
+  if constexpr (multi_gpu) {
+    auto const& comm     = handle.get_comms();
+    auto const comm_size = comm.get_size();
+    auto const comm_rank = comm.get_rank();
+
+    // Randomly shuffle the samples so that each gpu gets their
+    // desired number of samples
+
+    if (!exact_number_of_samples) {
+      // If we didn't force generating the exact number of samples,
+      // we might have fewer samples than requested.  We need to
+      // accommodate this situation.  For now we'll just
+      // uniformly(-ish) reduce the requested size.
+      size_t total_extracted = host_scalar_allreduce(
+        handle.get_comms(), srcs.size(), raft::comms::op_t::SUM, handle.get_stream());
+      size_t reduction = total_samples - total_extracted;
+
+      while (reduction > 0) {
+        size_t est_reduction_per_gpu = (reduction + comm_size - 1) / comm_size;
+        for (size_t i = 0; i < samples_per_gpu.size(); ++i) {
+          if (samples_per_gpu[i] > est_reduction_per_gpu) {
+            samples_per_gpu[i] -= est_reduction_per_gpu;
+            reduction -= est_reduction_per_gpu;
+          } else {
+            reduction -= samples_per_gpu[i];
+            samples_per_gpu[i] = 0;
+          }
+
+          if (reduction < est_reduction_per_gpu) est_reduction_per_gpu = reduction;
+        }
+      }
+      num_samples = samples_per_gpu[comm_rank];
+    }
+
+    // Mimic the logic of permute_range...
+    //
+    //  1) Randomly assign each entry to a GPU
+    //  2) Count how many are assigned to each GPU
+    //  3) Allgatherv (allgather?) to give each GPU a count for how many entries are destined for
+    //  that GPU 4) Identify extras/deficits for each GPU, arbitrarily adjust counts to make correct
+    //  5) Shuffle accordingly
+    //
+    rmm::device_uvector<int> gpu_assignment(srcs.size(), handle.get_stream());
+
+    cugraph::detail::uniform_random_fill(handle.get_stream(),
+                                         gpu_assignment.data(),
+                                         gpu_assignment.size(),
+                                         int{0},
+                                         int{comm_size},
+                                         rng_state);
+
+    thrust::sort_by_key(handle.get_thrust_policy(),
+                        gpu_assignment.begin(),
+                        gpu_assignment.end(),
+                        thrust::make_zip_iterator(srcs.begin(), dsts.begin()));
+
+    rmm::device_uvector<size_t> d_send_counts(comm_size, handle.get_stream());
+    thrust::tabulate(
+      handle.get_thrust_policy(),
+      d_send_counts.begin(),
+      d_send_counts.end(),
+      [gpu_assignment_span = raft::device_span<const int>{
+         gpu_assignment.data(), gpu_assignment.size()}] __device__(size_t i) {
+        auto begin = thrust::lower_bound(
+          thrust::seq, gpu_assignment_span.begin(), gpu_assignment_span.end(), static_cast<int>(i));
+        auto end =
+          thrust::upper_bound(thrust::seq, begin, gpu_assignment_span.end(), static_cast<int>(i));
+        return thrust::distance(begin, end);
+      });
+
+    std::vector<size_t> tx_value_counts(comm_size, 0);
+    raft::update_host(
+      tx_value_counts.data(), d_send_counts.data(), d_send_counts.size(), handle.get_stream());
+
+    std::forward_as_tuple(std::tie(srcs, dsts), std::ignore) =
+      cugraph::shuffle_values(handle.get_comms(),
+                              thrust::make_zip_iterator(srcs.begin(), dsts.begin()),
+                              tx_value_counts,
+                              handle.get_stream());
+
+    rmm::device_uvector<float> fractional_random_numbers(srcs.size(), handle.get_stream());
+
+    cugraph::detail::uniform_random_fill(handle.get_stream(),
+                                         fractional_random_numbers.data(),
+                                         fractional_random_numbers.size(),
+                                         float{0.0},
+                                         float{1.0},
+                                         rng_state);
+    thrust::sort_by_key(handle.get_thrust_policy(),
+                        fractional_random_numbers.begin(),
+                        fractional_random_numbers.end(),
+                        thrust::make_zip_iterator(srcs.begin(), dsts.begin()));
+
+    size_t nr_extras{0};
+    size_t nr_deficits{0};
+    if (srcs.size() > num_samples) {
+      nr_extras = srcs.size() - static_cast<size_t>(num_samples);
+    } else {
+      nr_deficits = static_cast<size_t>(num_samples) - srcs.size();
+    }
+
+    auto extra_srcs = cugraph::detail::device_allgatherv(
+      handle, comm, raft::device_span<vertex_t const>(srcs.data() + num_samples, nr_extras));
+    // nr_extras > 0 ? nr_extras : 0));
+    auto extra_dsts = cugraph::detail::device_allgatherv(
+      handle, comm, raft::device_span<vertex_t const>(dsts.data() + num_samples, nr_extras));
+    // nr_extras > 0 ? nr_extras : 0));
+
+    srcs.resize(num_samples, handle.get_stream());
+    dsts.resize(num_samples, handle.get_stream());
+    auto deficits =
+      cugraph::host_scalar_allgather(handle.get_comms(), nr_deficits, handle.get_stream());
+
+    std::exclusive_scan(deficits.begin(), deficits.end(), deficits.begin(), vertex_t{0});
+
+    raft::copy(srcs.data() + num_samples - nr_deficits,
+               extra_srcs.begin() + deficits[comm_rank],
+               nr_deficits,
+               handle.get_stream());
+
+    raft::copy(dsts.data() + num_samples - nr_deficits,
+               extra_dsts.begin() + deficits[comm_rank],
+               nr_deficits,
+               handle.get_stream());
+  }
 
-  return std::make_tuple(std::move(src), std::move(dst));
+  return std::make_tuple(std::move(srcs), std::move(dsts));
 }
 
 }  // namespace cugraph
diff --git a/cpp/tests/sampling/mg_negative_sampling.cpp b/cpp/tests/sampling/mg_negative_sampling.cpp
index 7c64bb7fbbb..eb9f4fbb394 100644
--- a/cpp/tests/sampling/mg_negative_sampling.cpp
+++ b/cpp/tests/sampling/mg_negative_sampling.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cugraph/utilities/host_scalar_comm.hpp"
 #include "utilities/base_fixture.hpp"
 #include "utilities/conversion_utilities.hpp"
 #include "utilities/property_generator_utilities.hpp"
@@ -85,8 +86,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam<input_usecase_
 
     if (negative_sampling_usecase.edge_masking) { graph_view.attach_edge_mask(edge_mask_->view()); }
 
-    size_t num_samples =
-      graph_view.compute_number_of_edges(*handle_) * negative_sampling_usecase.sample_multiplier;
+    size_t num_samples = graph_view.compute_number_of_edges(*handle_) *
+                         negative_sampling_usecase.sample_multiplier /
+                         handle_->get_comms().get_size();
 
     rmm::device_uvector<weight_t> src_bias_v(0, handle_->get_stream());
     rmm::device_uvector<weight_t> dst_bias_v(0, handle_->get_stream());
@@ -150,26 +152,8 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam<input_usecase_
                           raft::device_span<vertex_t>{src_out.data(), src_out.size()},
                           raft::device_span<vertex_t>{dst_out.data(), dst_out.size()});
 
-      // TODO:  Move this to validation_utilities...
-      auto h_vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
-      rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
-        h_vertex_partition_range_lasts.size(), handle_->get_stream());
-      raft::update_device(d_vertex_partition_range_lasts.data(),
-                          h_vertex_partition_range_lasts.data(),
-                          h_vertex_partition_range_lasts.size(),
-                          handle_->get_stream());
-
-      size_t error_count = cugraph::test::count_edges_on_wrong_int_gpu(
-        *handle_,
-        raft::device_span<vertex_t const>{src_out.data(), src_out.size()},
-        raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()},
-        raft::device_span<vertex_t const>{d_vertex_partition_range_lasts.data(),
-                                          d_vertex_partition_range_lasts.size()});
-
-      ASSERT_EQ(error_count, 0) << "generate edges out of range > 0";
-
       if ((negative_sampling_usecase.remove_duplicates) && (src_out.size() > 0)) {
-        error_count = cugraph::test::count_duplicate_vertex_pairs_sorted(
+        size_t error_count = cugraph::test::count_duplicate_vertex_pairs_sorted(
           *handle_,
           raft::device_span<vertex_t const>{src_out.data(), src_out.size()},
           raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()});
@@ -184,7 +168,7 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam<input_usecase_
           cugraph::decompress_to_edgelist<vertex_t, edge_t, float, int, false, true>(
             *handle_, graph_view, std::nullopt, std::nullopt, std::nullopt, std::nullopt);
 
-        error_count = cugraph::test::count_intersection<vertex_t, edge_t, weight_t, int32_t>(
+        size_t error_count = cugraph::test::count_intersection<vertex_t, edge_t, weight_t, int32_t>(
           *handle_,
           raft::device_span<vertex_t const>{graph_src.data(), graph_src.size()},
           raft::device_span<vertex_t const>{graph_dst.data(), graph_dst.size()},
@@ -202,7 +186,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam<input_usecase_
       if (negative_sampling_usecase.exact_number_of_samples) {
         size_t sz = cugraph::host_scalar_allreduce(
           handle_->get_comms(), src_out.size(), raft::comms::op_t::SUM, handle_->get_stream());
-        ASSERT_EQ(sz, num_samples) << "Expected exact number of samples";
+        size_t aggregate_sample_count = cugraph::host_scalar_allreduce(
+          handle_->get_comms(), num_samples, raft::comms::op_t::SUM, handle_->get_stream());
+        ASSERT_EQ(sz, aggregate_sample_count) << "Expected exact number of samples";
       }
 
       //  TBD: How do we determine if we have properly reflected the biases?

From ef450140bcd5f52b376dc84cb8c26c5c753d9eb3 Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Thu, 30 Jan 2025 06:55:40 +0100
Subject: [PATCH 6/9] Fix issues when building with upcoming cccl (#4888)

We are removing a lot of deprecated thrust features, so replace them by the equivalent `cuda::std` ones

Authors:
  - Michael Schellenberger Costa (https://github.com/miscco)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: https://github.com/rapidsai/cugraph/pull/4888
---
 cpp/include/cugraph/utilities/shuffle_comm.cuh | 12 ++++++------
 cpp/src/prims/property_op_utils.cuh            |  6 ++----
 cpp/src/traversal/extract_bfs_paths_impl.cuh   |  5 +++--
 cpp/tests/utilities/check_utilities.hpp        |  2 +-
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh
index 98fa2cb1706..d173cc08a1c 100644
--- a/cpp/include/cugraph/utilities/shuffle_comm.cuh
+++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -145,21 +145,21 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm,
 
 template <typename key_type, typename KeyToGroupIdOp>
 struct key_group_id_less_t {
-  KeyToGroupIdOp key_to_group_id_op{};
+  KeyToGroupIdOp key_to_group_id_op;
   int pivot{};
   __device__ bool operator()(key_type k) const { return key_to_group_id_op(k) < pivot; }
 };
 
 template <typename value_type, typename ValueToGroupIdOp>
 struct value_group_id_less_t {
-  ValueToGroupIdOp value_to_group_id_op{};
+  ValueToGroupIdOp value_to_group_id_op;
   int pivot{};
   __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) < pivot; }
 };
 
 template <typename key_type, typename value_type, typename KeyToGroupIdOp>
 struct kv_pair_group_id_less_t {
-  KeyToGroupIdOp key_to_group_id_op{};
+  KeyToGroupIdOp key_to_group_id_op;
   int pivot{};
   __device__ bool operator()(thrust::tuple<key_type, value_type> t) const
   {
@@ -169,14 +169,14 @@ struct kv_pair_group_id_less_t {
 
 template <typename value_type, typename ValueToGroupIdOp>
 struct value_group_id_greater_equal_t {
-  ValueToGroupIdOp value_to_group_id_op{};
+  ValueToGroupIdOp value_to_group_id_op;
   int pivot{};
   __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) >= pivot; }
 };
 
 template <typename key_type, typename value_type, typename KeyToGroupIdOp>
 struct kv_pair_group_id_greater_equal_t {
-  KeyToGroupIdOp key_to_group_id_op{};
+  KeyToGroupIdOp key_to_group_id_op;
   int pivot{};
   __device__ bool operator()(thrust::tuple<key_type, value_type> t) const
   {
diff --git a/cpp/src/prims/property_op_utils.cuh b/cpp/src/prims/property_op_utils.cuh
index 04ad22cbf71..2cab42c1dc8 100644
--- a/cpp/src/prims/property_op_utils.cuh
+++ b/cpp/src/prims/property_op_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -127,9 +127,7 @@ template <typename T, template <typename> typename Op>
 struct property_op : public Op<T> {};
 
 template <typename... Args, template <typename> typename Op>
-struct property_op<thrust::tuple<Args...>, Op>
-  : public thrust::
-      binary_function<thrust::tuple<Args...>, thrust::tuple<Args...>, thrust::tuple<Args...>> {
+struct property_op<thrust::tuple<Args...>, Op> {
   using Type = thrust::tuple<Args...>;
 
  private:
diff --git a/cpp/src/traversal/extract_bfs_paths_impl.cuh b/cpp/src/traversal/extract_bfs_paths_impl.cuh
index d228460bec3..d0a7979d14b 100644
--- a/cpp/src/traversal/extract_bfs_paths_impl.cuh
+++ b/cpp/src/traversal/extract_bfs_paths_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/functional>
 #include <thrust/binary_search.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
@@ -52,7 +53,7 @@ template <typename vertex_t>
 struct compute_max {
   vertex_t __device__ operator()(vertex_t lhs, vertex_t rhs)
   {
-    return thrust::max<vertex_t>(lhs, rhs);
+    return cuda::std::max<vertex_t>(lhs, rhs);
   }
 };
 
diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp
index a22d95c87de..6974d14be04 100644
--- a/cpp/tests/utilities/check_utilities.hpp
+++ b/cpp/tests/utilities/check_utilities.hpp
@@ -97,7 +97,7 @@ struct device_nearly_equal {
   bool __device__ operator()(type_t lhs, type_t rhs) const
   {
     return std::abs(lhs - rhs) <
-           cuda::std::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
+           cuda::std::max(cuda::std::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
   }
 };
 

From 4baef585cd0a932ade6b307564c6eebf41ffe61d Mon Sep 17 00:00:00 2001
From: Joseph Nke <76006812+jnke2016@users.noreply.github.com>
Date: Thu, 30 Jan 2025 22:13:05 +0100
Subject: [PATCH 7/9] Optimize K-Truss (#4742)

This PR introduces several optimization to speed up K-Truss. In fact, our K-Truss implementation computes the intersection of all edges regardless they are weak or not which can be very expensive if only few edges need to be invalidated. By running `nbr_intersection` on the weak edges, this considerably improves the runtime.

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Ralph Liu (https://github.com/nv-rliu)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: https://github.com/rapidsai/cugraph/pull/4742
---
 cpp/src/community/k_truss_impl.cuh | 810 +++++++++++++++++++++--------
 1 file changed, 585 insertions(+), 225 deletions(-)

diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh
index bd497b9c58c..25a1cf63f4d 100644
--- a/cpp/src/community/k_truss_impl.cuh
+++ b/cpp/src/community/k_truss_impl.cuh
@@ -19,6 +19,7 @@
 #include "prims/extract_transform_e.cuh"
 #include "prims/extract_transform_v_frontier_outgoing_e.cuh"
 #include "prims/fill_edge_property.cuh"
+#include "prims/per_v_pair_dst_nbr_intersection.cuh"
 #include "prims/transform_e.cuh"
 #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh"
 #include "prims/update_edge_src_dst_property.cuh"
@@ -32,6 +33,7 @@
 #include <raft/util/integer_utils.hpp>
 
 #include <cuda/std/optional>
+#include <cuda/std/utility>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/distance.h>
@@ -43,6 +45,59 @@
 
 namespace cugraph {
 
+template <typename vertex_t, typename edge_t>
+struct extract_weak_edges {
+  edge_t k{};
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_t count) const
+  {
+    // No need to process edges with count == 0
+    return ((count < k - 2) && (count != 0))
+             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             : cuda::std::nullopt;
+  }
+};
+
+template <typename edge_t>
+struct is_k_or_greater_t {
+  edge_t k{};
+  __device__ bool operator()(edge_t core_number) const { return core_number >= edge_t{k}; }
+};
+
+template <typename vertex_t, typename edge_t>
+struct extract_triangles_endpoints {
+  size_t chunk_start{};
+  raft::device_span<size_t const> intersection_offsets{};
+  raft::device_span<vertex_t const> intersection_indices{};
+  raft::device_span<vertex_t const> weak_srcs{};
+  raft::device_span<vertex_t const> weak_dsts{};
+
+  __device__ thrust::tuple<vertex_t, vertex_t, vertex_t> operator()(edge_t i) const
+  {
+    auto itr = thrust::upper_bound(
+      thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i);
+    auto idx = thrust::distance(intersection_offsets.begin() + 1, itr);
+
+    auto endpoints = thrust::make_tuple(weak_srcs[chunk_start + idx],  // p
+                                        weak_dsts[chunk_start + idx],  // q
+                                        intersection_indices[i]        // r
+    );
+
+    auto p = weak_srcs[chunk_start + idx];
+    auto q = weak_dsts[chunk_start + idx];
+    auto r = intersection_indices[i];
+    // Re-order the endpoints such that p < q < r in order to identify duplicate triangles
+    // which will cause overcompensation. comparing the vertex IDs is cheaper than comparing the
+    // degrees (d(p) < d(q) < d(r)) which will be done once in the latter stage to retrieve the
+    // direction of the edges once the triplet dependency is broken.
+    if (p > q) cuda::std::swap(p, q);
+    if (p > r) cuda::std::swap(p, r);
+    if (q > r) cuda::std::swap(q, r);
+
+    return thrust::make_tuple(p, q, r);
+  }
+};
+
 namespace {
 
 template <typename vertex_t>
@@ -60,39 +115,48 @@ struct exclude_self_loop_t {
   }
 };
 
-template <typename vertex_t, typename weight_t, typename edge_t>
-struct extract_low_to_high_degree_weighted_edges_t {
-  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>> operator()(
-    vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const
-  {
-    return (src_out_degree < dst_out_degree)
-             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
-                 src, dst, wgt)}
-             : (((src_out_degree == dst_out_degree) &&
-                 (src < dst) /* tie-breaking using vertex ID */)
-                  ? cuda::std::optional<
-                      thrust::tuple<vertex_t, vertex_t, weight_t>>{thrust::make_tuple(
-                      src, dst, wgt)}
-                  : cuda::std::nullopt);
-  }
-};
-
 template <typename vertex_t, typename edge_t>
-struct extract_low_to_high_degree_edges_t {
-  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+struct extract_low_to_high_degree_edges_from_endpoints_t {
+  raft::device_span<vertex_t const> srcs{};
+  raft::device_span<vertex_t const> dsts{};
+  raft::device_span<edge_t const> count{};
+  __device__ cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>> operator()(
     vertex_t src,
     vertex_t dst,
     edge_t src_out_degree,
     edge_t dst_out_degree,
     cuda::std::nullopt_t) const
   {
-    return (src_out_degree < dst_out_degree)
-             ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
-             : (((src_out_degree == dst_out_degree) &&
-                 (src < dst) /* tie-breaking using vertex ID */)
-                  ? cuda::std::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src,
-                                                                                              dst)}
-                  : cuda::std::nullopt);
+    // FIXME: Not the most efficient way because the entire edgelist is scan just to find
+    // the direction of the edges
+    auto itr = thrust::lower_bound(thrust::seq,
+                                   thrust::make_zip_iterator(srcs.begin(), dsts.begin()),
+                                   thrust::make_zip_iterator(srcs.end(), dsts.end()),
+                                   thrust::make_tuple(src, dst));
+
+    if ((itr != thrust::make_zip_iterator(srcs.end(), dsts.end())) &&
+        (*itr == thrust::make_tuple(src, dst))) {
+      auto idx = thrust::distance(thrust::make_zip_iterator(srcs.begin(), dsts.begin()), itr);
+
+      if (src_out_degree < dst_out_degree) {
+        return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+          thrust::make_tuple(src, dst, count[idx])};
+      } else if (dst_out_degree < src_out_degree) {
+        return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+          thrust::make_tuple(dst, src, count[idx])};
+      } else {
+        if ((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) {
+          return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+            thrust::make_tuple(src, dst, count[idx])};
+        } else if ((src_out_degree == dst_out_degree) &&
+                   (src > dst) /* tie-breaking using vertex ID */) {
+          return cuda::std::optional<thrust::tuple<vertex_t, vertex_t, edge_t>>{
+            thrust::make_tuple(dst, src, count[idx])};
+        }
+      }
+    } else {
+      return cuda::std::nullopt;
+    }
   }
 };
 
@@ -119,246 +183,543 @@ k_truss(raft::handle_t const& handle,
     // nothing to do
   }
 
-  std::optional<graph_t<vertex_t, edge_t, false, multi_gpu>> modified_graph{std::nullopt};
-  std::optional<graph_view_t<vertex_t, edge_t, false, multi_gpu>> modified_graph_view{std::nullopt};
-  std::optional<rmm::device_uvector<vertex_t>> renumber_map{std::nullopt};
-  std::optional<edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t>>
-    edge_weight{std::nullopt};
-  std::optional<rmm::device_uvector<weight_t>> wgts{std::nullopt};
-
-  if (graph_view.count_self_loops(handle) > edge_t{0}) {
-    auto [srcs, dsts] = extract_transform_e(handle,
-                                            graph_view,
-                                            edge_src_dummy_property_t{}.view(),
-                                            edge_dst_dummy_property_t{}.view(),
-                                            edge_dummy_property_t{}.view(),
-                                            exclude_self_loop_t<vertex_t>{});
-
-    if constexpr (multi_gpu) {
-      std::tie(
-        srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
+  // 2. Exclude self-loops and edges that do not belong to (k-1)-core
 
-    std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
-        handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::nullopt,
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{true, graph_view.is_multigraph()},
-        true);
+  auto cur_graph_view          = graph_view;
+  auto unmasked_cur_graph_view = cur_graph_view;
 
-    modified_graph_view = (*modified_graph).view();
-  }
+  if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); }
+  // mask for self-loops and edges not part of k-1 core
+  cugraph::edge_property_t<decltype(cur_graph_view), bool> undirected_mask(handle);
+  {
+    // 2.1 Exclude self-loops
 
-  // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core
+    if (cur_graph_view.count_self_loops(handle) > edge_t{0}) {
+      // 2.1. Exclude self-loops
 
-  {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
-
-    auto vertex_partition_range_lasts =
-      renumber_map
-        ? std::make_optional<std::vector<vertex_t>>(cur_graph_view.vertex_partition_range_lasts())
-        : std::nullopt;
-
-    rmm::device_uvector<edge_t> core_numbers(cur_graph_view.number_of_vertices(),
-                                             handle.get_stream());
-    core_number(
-      handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2});
-
-    raft::device_span<edge_t const> core_number_span{core_numbers.data(), core_numbers.size()};
-
-    auto [srcs, dsts, wgts] = k_core(handle,
-                                     cur_graph_view,
-                                     edge_weight_view,
-                                     k - 1,
-                                     std::make_optional(k_core_degree_type_t::OUT),
-                                     std::make_optional(core_number_span));
-
-    if constexpr (multi_gpu) {
-      std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::move(wgts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
+      cugraph::edge_property_t<decltype(cur_graph_view), bool> self_loop_edge_mask(handle,
+                                                                                   cur_graph_view);
+      cugraph::fill_edge_property(
+        handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false);
 
-    std::optional<rmm::device_uvector<vertex_t>> tmp_renumber_map{std::nullopt};
-    std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
+      transform_e(
         handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::move(wgts),
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{true, graph_view.is_multigraph()},
-        true);
+        cur_graph_view,
+        edge_src_dummy_property_t{}.view(),
+        edge_dst_dummy_property_t{}.view(),
+        edge_dummy_property_t{}.view(),
+        [] __device__(auto src, auto dst, auto, auto, auto) { return src != dst; },
+        self_loop_edge_mask.mutable_view());
+
+      undirected_mask = std::move(self_loop_edge_mask);
+      if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+      cur_graph_view.attach_edge_mask(undirected_mask.view());
+    }
 
-    modified_graph_view = (*modified_graph).view();
+    // 2.2 Find (k-1)-core and exclude edges that do not belong to (k-1)-core
+    {
+      rmm::device_uvector<edge_t> core_numbers(cur_graph_view.number_of_vertices(),
+                                               handle.get_stream());
+      core_number(handle,
+                  cur_graph_view,
+                  core_numbers.data(),
+                  k_core_degree_type_t::OUT,
+                  size_t{2},
+                  size_t{2});
+
+      edge_src_property_t<decltype(cur_graph_view), bool> edge_src_in_k_minus_1_cores(
+        handle, cur_graph_view);
+      edge_dst_property_t<decltype(cur_graph_view), bool> edge_dst_in_k_minus_1_cores(
+        handle, cur_graph_view);
+      auto in_k_minus_1_core_first =
+        thrust::make_transform_iterator(core_numbers.begin(), is_k_or_greater_t<edge_t>{k - 1});
+      rmm::device_uvector<bool> in_k_minus_1_core_flags(core_numbers.size(), handle.get_stream());
+      thrust::copy(handle.get_thrust_policy(),
+                   in_k_minus_1_core_first,
+                   in_k_minus_1_core_first + core_numbers.size(),
+                   in_k_minus_1_core_flags.begin());
+      update_edge_src_property(handle,
+                               cur_graph_view,
+                               in_k_minus_1_core_flags.begin(),
+                               edge_src_in_k_minus_1_cores.mutable_view());
+      update_edge_dst_property(handle,
+                               cur_graph_view,
+                               in_k_minus_1_core_flags.begin(),
+                               edge_dst_in_k_minus_1_cores.mutable_view());
+
+      cugraph::edge_property_t<decltype(cur_graph_view), bool> in_k_minus_1_core_edge_mask(
+        handle, cur_graph_view);
+      cugraph::fill_edge_property(
+        handle, unmasked_cur_graph_view, in_k_minus_1_core_edge_mask.mutable_view(), false);
+
+      transform_e(
+        handle,
+        cur_graph_view,
+        edge_src_in_k_minus_1_cores.view(),
+        edge_dst_in_k_minus_1_cores.view(),
+        edge_dummy_property_t{}.view(),
+        [] __device__(auto, auto, auto src_in_k_minus_1_core, auto dst_in_k_minus_1_core, auto) {
+          return src_in_k_minus_1_core && dst_in_k_minus_1_core;
+        },
+        in_k_minus_1_core_edge_mask.mutable_view());
 
-    if (renumber_map) {  // collapse renumber_map
-      unrenumber_int_vertices<vertex_t, multi_gpu>(handle,
-                                                   (*tmp_renumber_map).data(),
-                                                   (*tmp_renumber_map).size(),
-                                                   (*renumber_map).data(),
-                                                   *vertex_partition_range_lasts);
+      undirected_mask = std::move(in_k_minus_1_core_edge_mask);
+      if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+      cur_graph_view.attach_edge_mask(undirected_mask.view());
     }
-
-    renumber_map = std::move(tmp_renumber_map);
   }
 
   // 3. Keep only the edges from a low-degree vertex to a high-degree vertex.
 
-  {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
-
-    auto vertex_partition_range_lasts =
-      renumber_map
-        ? std::make_optional<std::vector<vertex_t>>(cur_graph_view.vertex_partition_range_lasts())
-        : std::nullopt;
+  edge_src_property_t<decltype(cur_graph_view), edge_t> edge_src_out_degrees(handle,
+                                                                             cur_graph_view);
+  edge_dst_property_t<decltype(cur_graph_view), edge_t> edge_dst_out_degrees(handle,
+                                                                             cur_graph_view);
 
+  cugraph::edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, bool> dodg_mask(
+    handle, cur_graph_view);
+  {
     auto out_degrees = cur_graph_view.compute_out_degrees(handle);
-    edge_src_property_t<decltype(cur_graph_view), edge_t> edge_src_out_degrees(handle,
-                                                                               cur_graph_view);
-    edge_dst_property_t<decltype(cur_graph_view), edge_t> edge_dst_out_degrees(handle,
-                                                                               cur_graph_view);
     update_edge_src_property(
       handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view());
     update_edge_dst_property(
       handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view());
 
-    rmm::device_uvector<vertex_t> srcs(0, handle.get_stream());
-    rmm::device_uvector<vertex_t> dsts(0, handle.get_stream());
-
-    edge_weight_view =
-      edge_weight ? std::make_optional((*edge_weight).view())
-                  : std::optional<edge_property_view_t<edge_t, weight_t const*>>{std::nullopt};
-    if (edge_weight_view) {
-      std::tie(srcs, dsts, wgts) = extract_transform_e(
-        handle,
-        cur_graph_view,
-        edge_src_out_degrees.view(),
-        edge_dst_out_degrees.view(),
-        *edge_weight_view,
-        extract_low_to_high_degree_weighted_edges_t<vertex_t, weight_t, edge_t>{});
-    } else {
-      std::tie(srcs, dsts) =
-        extract_transform_e(handle,
-                            cur_graph_view,
-                            edge_src_out_degrees.view(),
-                            edge_dst_out_degrees.view(),
-                            edge_dummy_property_t{}.view(),
-                            extract_low_to_high_degree_edges_t<vertex_t, edge_t>{});
-    }
-
-    if constexpr (multi_gpu) {
-      std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) =
-        detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                       edge_t,
-                                                                                       weight_t,
-                                                                                       int32_t,
-                                                                                       int32_t>(
-          handle,
-          std::move(srcs),
-          std::move(dsts),
-          std::move(wgts),
-          std::nullopt,
-          std::nullopt,
-          std::nullopt,
-          std::nullopt);
-    }
-
-    std::optional<rmm::device_uvector<vertex_t>> tmp_renumber_map{std::nullopt};
-
-    std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) =
-      create_graph_from_edgelist<vertex_t, edge_t, weight_t, int32_t, false, multi_gpu>(
-        handle,
-        std::nullopt,
-        std::move(srcs),
-        std::move(dsts),
-        std::move(wgts),
-        std::nullopt,
-        std::nullopt,
-        cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()},
-        true);
-
-    modified_graph_view = (*modified_graph).view();
-    if (renumber_map) {  // collapse renumber_map
-      unrenumber_int_vertices<vertex_t, multi_gpu>(handle,
-                                                   (*tmp_renumber_map).data(),
-                                                   (*tmp_renumber_map).size(),
-                                                   (*renumber_map).data(),
-                                                   *vertex_partition_range_lasts);
-    }
-    renumber_map = std::move(tmp_renumber_map);
+    cugraph::fill_edge_property(
+      handle, unmasked_cur_graph_view, dodg_mask.mutable_view(), bool{false});
+
+    cugraph::transform_e(
+      handle,
+      cur_graph_view,
+      edge_src_out_degrees.view(),
+      edge_dst_out_degrees.view(),
+      edge_dummy_property_t{}.view(),
+      [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, auto) {
+        return (src_out_degree < dst_out_degree) ? true
+               : ((src_out_degree == dst_out_degree) &&
+                  (src < dst) /* tie-breaking using vertex ID */)
+                 ? true
+                 : false;
+      },
+      dodg_mask.mutable_view(),
+      do_expensive_check);
+
+    if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); }
+    cur_graph_view.attach_edge_mask(dodg_mask.view());
   }
 
   // 4. Compute triangle count using nbr_intersection and unroll weak edges
 
   {
-    auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view;
+    // Mask self loops and edges not being part of k-1 core
+    auto weak_edges_mask = std::move(undirected_mask);
 
-    edge_weight_view =
-      edge_weight ? std::make_optional((*edge_weight).view())
-                  : std::optional<edge_property_view_t<edge_t, weight_t const*>>{std::nullopt};
+    auto edge_triangle_counts =
+      edge_triangle_count<vertex_t, edge_t, multi_gpu>(handle, cur_graph_view, false);
 
-    cugraph::edge_property_t<decltype(cur_graph_view), bool> edge_mask(handle, cur_graph_view);
-    cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true});
+    cugraph::edge_bucket_t<vertex_t, void, true, multi_gpu, true> edgelist_weak(handle);
+    cugraph::edge_bucket_t<vertex_t, void, true, multi_gpu, true> edges_to_decrement_count(handle);
+    size_t prev_chunk_size = 0;  // FIXME: Add support for chunking
 
     while (true) {
-      // FIXME: This approach is very expensive when invalidating only few edges per iteration
-      // and should be address.
-      auto edge_triangle_counts =
-        edge_triangle_count<vertex_t, edge_t, multi_gpu>(handle, cur_graph_view);
+      // Extract weak edges
+      auto [weak_edgelist_srcs, weak_edgelist_dsts] =
+        extract_transform_e(handle,
+                            cur_graph_view,
+                            edge_src_dummy_property_t{}.view(),
+                            edge_dst_dummy_property_t{}.view(),
+                            edge_triangle_counts.view(),
+                            extract_weak_edges<vertex_t, edge_t>{k});
+
+      auto weak_edgelist_first =
+        thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin());
+      auto weak_edgelist_last =
+        thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end());
+
+      thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_last);
+
+      // Perform nbr_intersection of the weak edges from the undirected
+      // graph view
+      cur_graph_view.clear_edge_mask();
+
+      // Attach the weak edge mask
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
+
+      auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection(
+        handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, do_expensive_check);
+
+      // This array stores (p, q, r) which are endpoints for the triangles with weak edges
+
+      auto triangles_endpoints =
+        allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t, vertex_t>>(
+          intersection_indices.size(), handle.get_stream());
+
+      // Extract endpoints for triangles with weak edges
+      thrust::tabulate(
+        handle.get_thrust_policy(),
+        get_dataframe_buffer_begin(triangles_endpoints),
+        get_dataframe_buffer_end(triangles_endpoints),
+        extract_triangles_endpoints<vertex_t, edge_t>{
+          prev_chunk_size,
+          raft::device_span<size_t const>(intersection_offsets.data(), intersection_offsets.size()),
+          raft::device_span<vertex_t const>(intersection_indices.data(),
+                                            intersection_indices.size()),
+          raft::device_span<vertex_t const>(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()),
+          raft::device_span<vertex_t const>(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())});
+
+      thrust::sort(handle.get_thrust_policy(),
+                   get_dataframe_buffer_begin(triangles_endpoints),
+                   get_dataframe_buffer_end(triangles_endpoints));
+
+      auto unique_triangle_end = thrust::unique(handle.get_thrust_policy(),
+                                                get_dataframe_buffer_begin(triangles_endpoints),
+                                                get_dataframe_buffer_end(triangles_endpoints));
+
+      auto num_unique_triangles = thrust::distance(  // Triangles are represented by their endpoints
+        get_dataframe_buffer_begin(triangles_endpoints),
+        unique_triangle_end);
+
+      resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream());
+
+      if constexpr (multi_gpu) {
+        auto& comm           = handle.get_comms();
+        auto const comm_size = comm.get_size();
+        auto& major_comm     = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
+        auto const major_comm_size = major_comm.get_size();
+        auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
+        auto const minor_comm_size = minor_comm.get_size();
+
+        auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts();
+
+        rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
+          vertex_partition_range_lasts.size(), handle.get_stream());
+
+        raft::update_device(d_vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.size(),
+                            handle.get_stream());
+
+        // Shuffle the edges with respect to the undirected graph view to the GPU
+        // owning edge (p, q). Remember that the triplet (p, q, r) is ordered based on the
+        // vertex ID and not the degree so (p, q) might not be an edge in the DODG but is
+        // surely an edge in the undirected graph
+        std::tie(triangles_endpoints, std::ignore) = groupby_gpu_id_and_shuffle_values(
+          handle.get_comms(),
+          get_dataframe_buffer_begin(triangles_endpoints),
+          get_dataframe_buffer_end(triangles_endpoints),
+
+          [key_func =
+             cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t<vertex_t>{
+               raft::device_span<vertex_t const>(d_vertex_partition_range_lasts.data(),
+                                                 d_vertex_partition_range_lasts.size()),
+               comm_size,
+               major_comm_size,
+               minor_comm_size}] __device__(auto val) {
+            return key_func(thrust::get<0>(val), thrust::get<1>(val));
+          },
+          handle.get_stream());
+
+        thrust::sort(handle.get_thrust_policy(),
+                     get_dataframe_buffer_begin(triangles_endpoints),
+                     get_dataframe_buffer_end(triangles_endpoints));
+
+        unique_triangle_end = thrust::unique(handle.get_thrust_policy(),
+                                             get_dataframe_buffer_begin(triangles_endpoints),
+                                             get_dataframe_buffer_end(triangles_endpoints));
+
+        num_unique_triangles =
+          thrust::distance(get_dataframe_buffer_begin(triangles_endpoints), unique_triangle_end);
+        resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream());
+      }
+
+      auto edgelist_to_update_count = allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+        3 * num_unique_triangles, handle.get_stream());
+
+      // The order no longer matters since duplicated triangles have been removed
+      // Flatten the endpoints to a list of egdes.
+      thrust::transform(
+        handle.get_thrust_policy(),
+        thrust::make_counting_iterator<edge_t>(0),
+        thrust::make_counting_iterator<edge_t>(size_dataframe_buffer(edgelist_to_update_count)),
+        get_dataframe_buffer_begin(edgelist_to_update_count),
+        [num_unique_triangles,
+         triangles_endpoints =
+           get_dataframe_buffer_begin(triangles_endpoints)] __device__(auto idx) {
+          auto idx_triangle           = idx % num_unique_triangles;
+          auto idx_vertex_in_triangle = idx / num_unique_triangles;
+          auto triangle               = (triangles_endpoints + idx_triangle).get_iterator_tuple();
+          vertex_t src;
+          vertex_t dst;
+
+          if (idx_vertex_in_triangle == 0) {
+            src = *(thrust::get<0>(triangle));
+            dst = *(thrust::get<1>(triangle));
+          }
+
+          if (idx_vertex_in_triangle == 1) {
+            src = *(thrust::get<0>(triangle));
+            dst = *(thrust::get<2>(triangle));
+          }
+
+          if (idx_vertex_in_triangle == 2) {
+            src = *(thrust::get<1>(triangle));
+            dst = *(thrust::get<2>(triangle));
+          }
+
+          return thrust::make_tuple(src, dst);
+        });
+
+      if constexpr (multi_gpu) {
+        std::tie(std::get<0>(edgelist_to_update_count),
+                 std::get<1>(edgelist_to_update_count),
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int32_t,
+                                                                                         int32_t>(
+            handle,
+            std::move(std::get<0>(edgelist_to_update_count)),
+            std::move(std::get<1>(edgelist_to_update_count)),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            cur_graph_view.vertex_partition_range_lasts());
+      }
+
+      thrust::sort(handle.get_thrust_policy(),
+                   get_dataframe_buffer_begin(edgelist_to_update_count),
+                   get_dataframe_buffer_end(edgelist_to_update_count));
+
+      auto unique_pair_count =
+        thrust::unique_count(handle.get_thrust_policy(),
+                             get_dataframe_buffer_begin(edgelist_to_update_count),
+                             get_dataframe_buffer_end(edgelist_to_update_count));
+
+      auto vertex_pair_buffer_unique = allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+        unique_pair_count, handle.get_stream());
+
+      rmm::device_uvector<edge_t> decrease_count(unique_pair_count, handle.get_stream());
+
+      thrust::reduce_by_key(handle.get_thrust_policy(),
+                            get_dataframe_buffer_begin(edgelist_to_update_count),
+                            get_dataframe_buffer_end(edgelist_to_update_count),
+                            thrust::make_constant_iterator(size_t{1}),
+                            get_dataframe_buffer_begin(vertex_pair_buffer_unique),
+                            decrease_count.begin(),
+                            thrust::equal_to<thrust::tuple<vertex_t, vertex_t>>{});
+
+      std::tie(std::get<0>(vertex_pair_buffer_unique),
+               std::get<1>(vertex_pair_buffer_unique),
+               decrease_count) =
+        extract_transform_e(
+          handle,
+          cur_graph_view,
+          edge_src_out_degrees.view(),
+          edge_dst_out_degrees.view(),
+          edge_dummy_property_t{}.view(),
+          extract_low_to_high_degree_edges_from_endpoints_t<vertex_t, edge_t>{
+            raft::device_span<vertex_t const>(std::get<0>(vertex_pair_buffer_unique).data(),
+                                              std::get<0>(vertex_pair_buffer_unique).size()),
+            raft::device_span<vertex_t const>(std::get<1>(vertex_pair_buffer_unique).data(),
+                                              std::get<1>(vertex_pair_buffer_unique).size()),
+            raft::device_span<edge_t const>(decrease_count.data(), decrease_count.size())});
+
+      if constexpr (multi_gpu) {
+        auto& comm           = handle.get_comms();
+        auto const comm_size = comm.get_size();
+        auto& major_comm     = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
+        auto const major_comm_size = major_comm.get_size();
+        auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
+        auto const minor_comm_size        = minor_comm.get_size();
+        auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts();
+
+        rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
+          vertex_partition_range_lasts.size(), handle.get_stream());
+        raft::update_device(d_vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.data(),
+                            vertex_partition_range_lasts.size(),
+                            handle.get_stream());
+
+        std::forward_as_tuple(std::tie(std::get<0>(vertex_pair_buffer_unique),
+                                       std::get<1>(vertex_pair_buffer_unique),
+                                       decrease_count),
+                              std::ignore) =
+          groupby_gpu_id_and_shuffle_values(
+            handle.get_comms(),
+            thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                      std::get<1>(vertex_pair_buffer_unique).begin(),
+                                      decrease_count.begin()),
+            thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(),
+                                      std::get<1>(vertex_pair_buffer_unique).end(),
+                                      decrease_count.end()),
+            [key_func =
+               cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t<vertex_t>{
+                 raft::device_span<vertex_t const>(d_vertex_partition_range_lasts.data(),
+                                                   d_vertex_partition_range_lasts.size()),
+                 comm_size,
+                 major_comm_size,
+                 minor_comm_size}] __device__(auto val) {
+              return key_func(thrust::get<0>(val), thrust::get<1>(val));
+            },
+            handle.get_stream());
+      }
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          get_dataframe_buffer_begin(vertex_pair_buffer_unique),
+                          get_dataframe_buffer_end(vertex_pair_buffer_unique),
+                          decrease_count.begin());
+
+      // Update count of weak edges
+      edges_to_decrement_count.clear();
+
+      edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                      std::get<0>(vertex_pair_buffer_unique).end(),
+                                      std::get<1>(vertex_pair_buffer_unique).begin());
+
+      cur_graph_view.clear_edge_mask();
+      // Check for edge existance on the directed graph view
+      cur_graph_view.attach_edge_mask(dodg_mask.view());
+
+      // Update count of weak edges from the DODG view
+      cugraph::transform_e(
+        handle,
+        cur_graph_view,
+        edges_to_decrement_count,
+        cugraph::edge_src_dummy_property_t{}.view(),
+        cugraph::edge_dst_dummy_property_t{}.view(),
+        edge_triangle_counts.view(),
+        [edge_buffer_first =
+           thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(),
+                                     std::get<1>(vertex_pair_buffer_unique).begin()),
+         edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(),
+                                                      std::get<1>(vertex_pair_buffer_unique).end()),
+         decrease_count   = raft::device_span<edge_t>(
+           decrease_count.data(), decrease_count.size())] __device__(auto src,
+                                                                     auto dst,
+                                                                     cuda::std::nullopt_t,
+                                                                     cuda::std::nullopt_t,
+                                                                     edge_t count) {
+          auto itr_pair = thrust::lower_bound(
+            thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst));
+          auto idx_pair = thrust::distance(edge_buffer_first, itr_pair);
+          count -= decrease_count[idx_pair];
+
+          return count;
+        },
+        edge_triangle_counts.mutable_view(),
+        do_expensive_check);
+
+      edgelist_weak.clear();
+
+      thrust::sort(
+        handle.get_thrust_policy(),
+        thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()),
+        thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()));
+
+      edgelist_weak.insert(
+        weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin());
 
-      // Mask all the edges that have k - 2 count
+      // Get undirected graph view
+      cur_graph_view.clear_edge_mask();
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
 
       auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle);
 
       cugraph::transform_e(
         handle,
         cur_graph_view,
+        edgelist_weak,
         cugraph::edge_src_dummy_property_t{}.view(),
         cugraph::edge_dst_dummy_property_t{}.view(),
-        edge_triangle_counts.view(),
-        [k] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) {
-          return count >= k - 2;
+        cugraph::edge_dummy_property_t{}.view(),
+        [] __device__(
+          auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
+          return false;
         },
-        edge_mask.mutable_view(),
-        false);
+        weak_edges_mask.mutable_view(),
+        do_expensive_check);
+
+      edgelist_weak.clear();
+
+      // shuffle the edges if multi_gpu
+      if constexpr (multi_gpu) {
+        std::tie(weak_edgelist_dsts,
+                 weak_edgelist_srcs,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore,
+                 std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int32_t,
+                                                                                         int32_t>(
+            handle,
+            std::move(weak_edgelist_dsts),
+            std::move(weak_edgelist_srcs),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            cur_graph_view.vertex_partition_range_lasts());
+      }
+
+      thrust::sort(
+        handle.get_thrust_policy(),
+        thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()),
+        thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()));
+
+      edgelist_weak.insert(
+        weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin());
 
-      cur_graph_view.attach_edge_mask(edge_mask.view());
+      cugraph::transform_e(
+        handle,
+        cur_graph_view,
+        edgelist_weak,
+        cugraph::edge_src_dummy_property_t{}.view(),
+        cugraph::edge_dst_dummy_property_t{}.view(),
+        cugraph::edge_dummy_property_t{}.view(),
+        [] __device__(
+          auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) {
+          return false;
+        },
+        weak_edges_mask.mutable_view(),
+        do_expensive_check);
+
+      cur_graph_view.attach_edge_mask(weak_edges_mask.view());
 
       if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; }
+
+      cur_graph_view.clear_edge_mask();
+      cur_graph_view.attach_edge_mask(dodg_mask.view());
     }
 
+    cur_graph_view.clear_edge_mask();
+    cur_graph_view.attach_edge_mask(dodg_mask.view());
+
+    cugraph::transform_e(
+      handle,
+      cur_graph_view,
+      cugraph::edge_src_dummy_property_t{}.view(),
+      cugraph::edge_dst_dummy_property_t{}.view(),
+      edge_triangle_counts.view(),
+      [] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) {
+        return count == 0 ? false : true;
+      },
+      dodg_mask.mutable_view(),
+      do_expensive_check);
+
     rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
     rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
     std::optional<rmm::device_uvector<weight_t>> edgelist_wgts{std::nullopt};
@@ -367,11 +728,10 @@ k_truss(raft::handle_t const& handle,
       decompress_to_edgelist(
         handle,
         cur_graph_view,
-        edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt,
+        edge_weight_view,
         std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
-        std::make_optional(
-          raft::device_span<vertex_t const>((*renumber_map).data(), (*renumber_map).size())));
+        std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
     std::tie(edgelist_srcs,
              edgelist_dsts,

From 1d542ef3d802e37ffd4b9984a85caedea6ce8384 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 30 Jan 2025 22:20:23 -0600
Subject: [PATCH 8/9] Build and test with CUDA 12.8.0 (#4896)

This PR uses CUDA 12.8.0 to build and test.

xref: https://github.com/rapidsai/build-planning/issues/139

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cugraph/pull/4896
---
 .../devcontainer.json                         |  8 ++---
 .../devcontainer.json                         | 12 +++----
 .github/workflows/build.yaml                  | 20 +++++------
 .github/workflows/pr.yaml                     | 34 +++++++++----------
 .github/workflows/test.yaml                   | 10 +++---
 .../trigger-breaking-change-alert.yaml        |  2 +-
 ..._64.yaml => all_cuda-128_arch-x86_64.yaml} |  4 +--
 dependencies.yaml                             |  8 +++--
 python/cugraph/pyproject.toml                 |  2 +-
 python/libcugraph/pyproject.toml              |  4 +--
 python/pylibcugraph/pyproject.toml            |  2 +-
 11 files changed, 55 insertions(+), 51 deletions(-)
 rename .devcontainer/{cuda12.5-conda => cuda12.8-conda}/devcontainer.json (91%)
 rename .devcontainer/{cuda12.5-pip => cuda12.8-pip}/devcontainer.json (88%)
 rename conda/environments/{all_cuda-125_arch-x86_64.yaml => all_cuda-128_arch-x86_64.yaml} (96%)

diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json
similarity index 91%
rename from .devcontainer/cuda12.5-conda/devcontainer.json
rename to .devcontainer/cuda12.8-conda/devcontainer.json
index 3ed6fa9c37b..ad36130c6a0 100644
--- a/.devcontainer/cuda12.5-conda/devcontainer.json
+++ b/.devcontainer/cuda12.8-conda/devcontainer.json
@@ -3,7 +3,7 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
       "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04"
     }
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
@@ -20,7 +20,7 @@
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
@@ -29,7 +29,7 @@
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json
similarity index 88%
rename from .devcontainer/cuda12.5-pip/devcontainer.json
rename to .devcontainer/cuda12.8-pip/devcontainer.json
index fe402024e29..a2955b81a60 100644
--- a/.devcontainer/cuda12.5-pip/devcontainer.json
+++ b/.devcontainer/cuda12.8-pip/devcontainer.json
@@ -3,20 +3,20 @@
     "context": "${localWorkspaceFolder}/.devcontainer",
     "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
     "args": {
-      "CUDA": "12.5",
+      "CUDA": "12.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
     "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": {
-      "version": "12.5",
+      "version": "12.8",
       "installcuBLAS": true,
       "installcuSOLVER": true,
       "installcuRAND": true,
@@ -28,7 +28,7 @@
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
-  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"],
   "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
   "workspaceFolder": "/home/coder",
   "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
@@ -36,7 +36,7 @@
     "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 7348cf33c1d..b6a6156ecc1 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -38,7 +38,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -47,7 +47,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-libcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -80,7 +80,7 @@ jobs:
   wheel-publish-libcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -91,7 +91,7 @@ jobs:
   wheel-build-pylibcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -101,7 +101,7 @@ jobs:
   wheel-publish-pylibcugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -112,7 +112,7 @@ jobs:
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -122,7 +122,7 @@ jobs:
   wheel-publish-cugraph:
     needs: wheel-build-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e48f2e11acd..e4e09fbd279 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -31,7 +31,7 @@ jobs:
       - telemetry-setup
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
@@ -59,7 +59,7 @@ jobs:
   changed-files:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0
     with:
       files_yaml: |
         test_cpp:
@@ -90,28 +90,28 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: telemetry-summarize
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0
     with:
       build_type: pull-request
       node_type: cpu32
   conda-cpp-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0
     with:
       build_type: pull-request
       enable_check_symbols: true
@@ -119,20 +119,20 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0
     with:
       build_type: pull-request
   conda-python-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
@@ -143,7 +143,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -153,7 +153,7 @@ jobs:
   wheel-build-libcugraph:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -162,14 +162,14 @@ jobs:
   wheel-build-pylibcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       build_type: pull-request
       script: ci/build_wheel_pylibcugraph.sh
   wheel-tests-pylibcugraph:
     needs: [wheel-build-pylibcugraph, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -177,14 +177,14 @@ jobs:
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph.sh
   wheel-tests-cugraph:
     needs: [wheel-build-cugraph, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -192,10 +192,10 @@ jobs:
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0
     with:
       arch: '["amd64"]'
-      cuda: '["12.5"]'
+      cuda: '["12.8"]'
       node_type: cpu32
       build_command: |
         sccache -z;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 4aa698c987f..a5c1e9a823f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -26,7 +26,7 @@ jobs:
       symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -51,7 +51,7 @@ jobs:
       script: ci/test_wheel_pylibcugraph.sh
   wheel-tests-cugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 01dd2436beb..07f0f83cc92 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
similarity index 96%
rename from conda/environments/all_cuda-125_arch-x86_64.yaml
rename to conda/environments/all_cuda-128_arch-x86_64.yaml
index 83126df9194..22c5f594a42 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.6.2,<13.0a0
-- cuda-version=12.5
+- cuda-version=12.8
 - cudf==25.2.*,>=0.0.0a0
 - cupy>=12.0.0
 - cxx-compiler
@@ -77,4 +77,4 @@ dependencies:
 - torchmetrics
 - ucx-py==0.42.*,>=0.0.0a0
 - wheel
-name: all_cuda-125_arch-x86_64
+name: all_cuda-128_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 02fa03cff70..5419be9beda 100755
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: [conda]
     matrix:
-      cuda: ["11.8", "12.5"]
+      cuda: ["11.8", "12.8"]
       arch: [x86_64]
     includes:
       - checks
@@ -279,6 +279,10 @@ dependencies:
               cuda: "12.5"
             packages:
               - cuda-version=12.5
+          - matrix:
+              cuda: "12.8"
+            packages:
+              - cuda-version=12.8
   cuda:
     specific:
       - output_types: [conda]
@@ -300,7 +304,7 @@ dependencies:
               - cuda-nvtx
   common_build:
     common:
-      - output_types: [conda, pyproject]
+      - output_types: [conda, requirements, pyproject]
         packages:
           - &cmake_ver cmake>=3.26.4,!=3.30.0
           - ninja
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index dfe3b085fdf..060d4ee1e99 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -72,7 +72,7 @@ build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["cugraph"]
 
diff --git a/python/libcugraph/pyproject.toml b/python/libcugraph/pyproject.toml
index a6191e28000..9d85bfa5dac 100644
--- a/python/libcugraph/pyproject.toml
+++ b/python/libcugraph/pyproject.toml
@@ -53,14 +53,14 @@ select = [
 ]
 
 # detect when package size grows significantly
-max_allowed_size_compressed = '1.2G'
+max_allowed_size_compressed = '1.4G'
 
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["libcugraph"]
 wheel.install-dir = "libcugraph"
diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml
index ac124e1fd5f..3c50a79bfa3 100644
--- a/python/pylibcugraph/pyproject.toml
+++ b/python/pylibcugraph/pyproject.toml
@@ -57,7 +57,7 @@ build-dir = "build/{wheel_tag}"
 cmake.build-type = "Release"
 cmake.version = "CMakeLists.txt"
 minimum-version = "build-system.requires"
-ninja.make-fallback = true
+ninja.make-fallback = false
 sdist.reproducible = true
 wheel.packages = ["pylibcugraph"]
 

From dc435cfcc9636f02f22ce81362c4ccf617a6d12d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 31 Jan 2025 05:27:41 -0800
Subject: [PATCH 9/9] Revert CUDA 12.8 shared workflow branch changes (#4906)

This PR points the shared workflow branches back to the default 25.02 branches.

xref: https://github.com/rapidsai/build-planning/issues/139

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cugraph/pull/4906
---
 .github/workflows/build.yaml                  | 20 ++++++------
 .github/workflows/pr.yaml                     | 32 +++++++++----------
 .github/workflows/test.yaml                   | 10 +++---
 .../trigger-breaking-change-alert.yaml        |  2 +-
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index b6a6156ecc1..7348cf33c1d 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -38,7 +38,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -47,7 +47,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-libcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -80,7 +80,7 @@ jobs:
   wheel-publish-libcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -91,7 +91,7 @@ jobs:
   wheel-build-pylibcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -101,7 +101,7 @@ jobs:
   wheel-publish-pylibcugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -112,7 +112,7 @@ jobs:
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -122,7 +122,7 @@ jobs:
   wheel-publish-cugraph:
     needs: wheel-build-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e4e09fbd279..8e3134b896e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -31,7 +31,7 @@ jobs:
       - telemetry-setup
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
@@ -59,7 +59,7 @@ jobs:
   changed-files:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02
     with:
       files_yaml: |
         test_cpp:
@@ -90,28 +90,28 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: telemetry-summarize
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02
     with:
       build_type: pull-request
       node_type: cpu32
   conda-cpp-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02
     with:
       build_type: pull-request
       enable_check_symbols: true
@@ -119,20 +119,20 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02
     with:
       build_type: pull-request
   conda-python-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
@@ -143,7 +143,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -153,7 +153,7 @@ jobs:
   wheel-build-libcugraph:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -162,14 +162,14 @@ jobs:
   wheel-build-pylibcugraph:
     needs: wheel-build-libcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: pull-request
       script: ci/build_wheel_pylibcugraph.sh
   wheel-tests-pylibcugraph:
     needs: [wheel-build-pylibcugraph, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -177,14 +177,14 @@ jobs:
   wheel-build-cugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph.sh
   wheel-tests-cugraph:
     needs: [wheel-build-cugraph, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -192,7 +192,7 @@ jobs:
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02
     with:
       arch: '["amd64"]'
       cuda: '["12.8"]'
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index a5c1e9a823f..4aa698c987f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -26,7 +26,7 @@ jobs:
       symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -51,7 +51,7 @@ jobs:
       script: ci/test_wheel_pylibcugraph.sh
   wheel-tests-cugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 07f0f83cc92..01dd2436beb 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda-12.8.0
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}